includes:
in_header: preamble.tex
library(ggplot2)
library(plm)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
##
## between, lag, lead
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(writexl)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(readr)
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
library(stringr)
library(broom)
library(tidyr)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(PerformanceAnalytics)
## Loading required package: xts
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
library(Metrics)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(glue)
EFPIA/ Europe
EFPIA R&D spending by country by year in euros
From yearly R&D reports 2004: Link Removed. Screenshot can be
found in original data folder 2005: https://www.efpia.eu/media/15485/the-pharmaceutical-industry-in-figures-edition-2007.pdf
2006: https://www.efpia.eu/media/15486/the-pharmaceutical-industry-in-figures-edition-2008.pdf
2007: https://www.pharmine.eu/wp-content/uploads/2014/04/The_Pharma_Industry_in_figures_Edition_2009-20080612-009-EN-v1.pdf
2008: https://www.sfee.gr/wp-content/uploads/2015/05/Figures_2010_Final-20100611-001-EN-v1_0.pdf
2009: https://www.efpia.eu/media/15489/the-pharmaceutical-industry-in-figures-edition-2011.pdf
2010: https://www.sfee.gr/wp-content/uploads/2015/05/EFPIA_Figures_2012_Final.pdf
Data for 2011 could not be found 2012: https://www.phar-in.eu/wp-content/uploads/2014/05/Figures_2014_Final.pdf
2013: https://www.efpia.eu/media/25822/2015-the-pharmaceutical-industry-in-figures.pdf
2014: https://www.efpia.eu/media/25055/the-pharmaceutical-industry-in-figures-june-2016.pdf
2015: https://www.efpia.eu/media/219735/efpia-pharmafigures2017_statisticbroch_v04-final.pdf
2016: https://www.efpia.eu/media/361960/efpia-pharmafigures2018_v07-hq.pdf
2017: https://www.efpia.eu/media/412931/the-pharmaceutical-industry-in-figures-2019.pdf
2018: https://www.efpia.eu/media/554521/efpia_pharmafigures_2020_web.pdf
2019: https://www.efpia.eu/media/602709/the-pharmaceutical-industry-in-figures-2021.pdf
2020: https://www.efpia.eu/media/637143/the-pharmaceutical-industry-in-figures-2022.pdf
2021: https://www.efpia.eu/media/rm4kzdlx/the-pharmaceutical-industry-in-figures-2023.pdf
efpia_rd <- read.csv("Cleaned Data/Area Level R&D Spending/EFPIA R&D Spending by Country in EU.csv")
efpia_rd <- clean_names(efpia_rd)
efpia_rd
formatting
efpia_rd$r_d_spending_millions_euros <- gsub(",", "", efpia_rd$r_d_spending_millions_euros)
efpia_rd$r_d_spending_millions_euros <- as.numeric(efpia_rd$r_d_spending_millions_euros)
## Warning: NAs introduced by coercion
efpia_rd
sum total EU by year
efpia_rd_totals <- efpia_rd %>%
group_by(year) %>%
summarise(total_mil_euros = sum(r_d_spending_millions_euros, na.rm = TRUE))
efpia_rd_totals
Imputing 2011 with average between 2011 and 2012 efpia_rd_totals
rd_2012 <- efpia_rd_totals[efpia_rd_totals$year == 2012, ]$total_mil_euros
rd_2010 <- efpia_rd_totals[efpia_rd_totals$year == 2010, ]$total_mil_euros
avg_2010_2012 <- (rd_2010 + rd_2012) / 2
efpia_rd_totals <- rbind(efpia_rd_totals, c(2011, avg_2010_2012))
efpia_rd_totals <- efpia_rd_totals[order(efpia_rd_totals$year),]
efpia_rd_totals
Dollars to euros https://www.macrotrends.net/2548/euro-dollar-exchange-rate-historical-chart
usdtoeuroavgconversion <- read_xlsx("Original Data/USD to Euro Conversion Avg by Year.xlsx")
usdtoeuroavgconversion
eurotodollars <- as.data.frame(cbind(usdtoeuroavgconversion$Year, 1/usdtoeuroavgconversion$`Average Closing Price`))
colnames(eurotodollars) <- c("year", "euros_to_dollar_conversion")
eurotodollars
multiply conversion rate to get r&d spending in dollars
rd_conversion_merged_df <- merge(efpia_rd_totals, eurotodollars, by = "year")
rd_conversion_merged_df
efpia_rd_totals$total_rd_mil_dollars <- rd_conversion_merged_df$euros_to_dollar_conversion * rd_conversion_merged_df$total_mil_euros
efpia_rd_totals$annual_domestic_RD_spending_bil_dollars <- efpia_rd_totals$total_rd_mil_dollars /1000
efpia_rd_totals
EU orphan drug market authorizations
EU Orphan Drugs were scraped from a pdf https://www.orpha.net/pdfs/orphacom/cahiers/docs/GB/list_of_orphan_drugs_in_europe.pdf
it can be found in the the Original Data Folder
eu_orphan_drugs <- read_xlsx("Cleaned Data/Area Level Orphan Authorizations/EU_Orphan_w_Company from https___www.orpha.net_orphacom_cahiers_docs_GB_list_of_orphan_drugs_in_europe.pdf.xlsx")
eu_orphan_drugs <- clean_names(eu_orphan_drugs)
eu_orphan_drugs
Check for duplicated rows
eu_orphan_drugs[duplicated(eu_orphan_drugs),]
EU number of market authorizations for each drug
eu_ma_freq <- eu_orphan_drugs %>%
count(tradename) %>%
arrange(desc(n))
colnames(eu_ma_freq) <- c("Generic Name", "Market Authorization Frequency")
eu_ma_freq$`Generic Name` <- tolower(eu_ma_freq$`Generic Name`)
eu_ma_freq
How many ODs with more than 1 authorization
nrow(eu_ma_freq[eu_ma_freq$`Market Authorization Frequency` > 1, ])
## [1] 3
stargazer(eu_ma_freq[1:3,], title= "EU Market Authorization Frequencies for Orphan Drugs with Over 1 Market Authorization", rownames = FALSE, align=TRUE, summary=FALSE, type = "html", out="Final Figures Formatted/EU_OD_Multiple_MAs.htm")
##
## <table style="text-align:center"><caption><strong>EU Market Authorization Frequencies for Orphan Drugs with Over 1 Market Authorization</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Generic Name</td><td>Market Authorization Frequency</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>carbaglu</td><td>2</td></tr>
## <tr><td>nexavar</td><td>2</td></tr>
## <tr><td>soliris</td><td>2</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr></table>
Look at where there are multiple market authorizations for the same
drug
eu_orphan_drugs[eu_orphan_drugs$tradename == "CARBAGLU",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "NEXAVAR",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "SOLIRIS",]
Select only new drugs in the EU
eu_orphan_drugs_new <- eu_orphan_drugs %>%
group_by(tradename) %>%
slice_min(order_by = market_authorization_year, n = 1, with_ties = FALSE) %>% # Select the entry with the earliest market authorization year
ungroup()
eu_orphan_drugs_new
format new EU orphan drugs by year
eu_new_od_ma_freqs <- as.data.frame(table(eu_orphan_drugs_new$market_authorization_year))
colnames(eu_new_od_ma_freqs) <- c("Year", "Freq New Orphan Drug MAs")
eu_new_od_ma_freqs
eu_new_od_ma_freqs$Year <- as.character(eu_new_od_ma_freqs$Year)
eu_new_od_ma_freqs$Year <- as.numeric(eu_new_od_ma_freqs$Year)
eu_new_od_ma_freqs <- eu_new_od_ma_freqs[order(-eu_new_od_ma_freqs$Year),]
head(eu_new_od_ma_freqs)
Select all MAs in the EU
format EU market authorizations by year
eu_od_ma_freqs <- as.data.frame(table(eu_orphan_drugs$market_authorization_year))
colnames(eu_od_ma_freqs) <- c("Year", "Freq Orphan Drug MAs")
eu_od_ma_freqs
eu_od_ma_freqs$Year <- as.character(eu_od_ma_freqs$Year)
eu_od_ma_freqs$Year <- as.numeric(eu_od_ma_freqs$Year)
eu_od_ma_freqs <- eu_od_ma_freqs[order(-eu_od_ma_freqs$Year),]
head(eu_od_ma_freqs)
eu_od_ma_freqs <- merge(eu_od_ma_freqs, eu_new_od_ma_freqs, by = "Year")
eu_od_ma_freqs
Graph of EU Market Authorizations orphan drugs
ggplot(eu_od_ma_freqs, aes(x = Year)) +
geom_line(aes(y = `Freq Orphan Drug MAs`, color="Orphan Drug MAs")) +
geom_line(aes(y = `Freq New Orphan Drug MAs`, color="New Orphan Drug MAs")) +
labs(x = "Year",
y = str_wrap("Annual Number of Market Authorizations in the EU", width=40),
title=str_wrap("Annual Number of Orphan Drug Market Authorizations in the EU", width=50)
) +
scale_color_manual(name = "Color", values = c("darkred", "dodgerblue"))+
theme_minimal()

merge orphan drug and R&D spending tables
colnames(efpia_rd_totals) <- c("Year", "total_rd_euros", "total_rd_mil_dollars" , "total_rd_bil_dollars")
eu_rd_mas <- merge(efpia_rd_totals,eu_od_ma_freqs,by="Year")
eu_rd_mas <- clean_names(eu_rd_mas)
eu_rd_mas
PhRMA/ United States
US orphan drug Market Authorizations
https://www.accessdata.fda.gov/scripts/opdlisting/oopd/
Only approved products
us_orphan_drugs <- read.csv("Original Data/Area Level Orphan Authorizations/US_FDA_OD.csv")
us_orphan_drugs <- clean_names(us_orphan_drugs)
us_orphan_drugs <- us_orphan_drugs[!is.na(us_orphan_drugs$marketing_approval_date), ]
us_orphan_drugs$marketing_approval_date <- as.Date(us_orphan_drugs$marketing_approval_date, format="%m/%d/%y")
us_orphan_drugs[order(us_orphan_drugs$marketing_approval_date),]
Check for duplicated rows Remove one row because duplicated
us_orphan_drugs[duplicated(us_orphan_drugs),]
us_orphan_drugs[us_orphan_drugs$generic_name == "tacrolimus" ,]
us_orphan_drugs <- us_orphan_drugs[-1119, ]
us_orphan_drugs[duplicated(us_orphan_drugs),]
Number of designations per product
us_ma_freq <- us_orphan_drugs %>%
count(generic_name) %>%
arrange(desc(n))
colnames(us_ma_freq) <- c("Generic Name", "Market Authorization Frequency")
us_ma_freq
How many ODs with more than 1 authorization
nrow(us_ma_freq[us_ma_freq$`Market Authorization Frequency` > 1, ])
## [1] 223
stargazer(us_ma_freq[1:19,], title= "US Market Authorization Frequencies for Orphan Drugs with Over 5 Market Authorizations", rownames = FALSE, align=TRUE, summary=FALSE, type = "html", out="Final Figures Formatted/US_OD_Multiple_MAs.htm")
##
## <table style="text-align:center"><caption><strong>US Market Authorization Frequencies for Orphan Drugs with Over 5 Market Authorizations</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Generic Name</td><td>Market Authorization Frequency</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>nivolumab</td><td>16</td></tr>
## <tr><td>pembrolizumab</td><td>16</td></tr>
## <tr><td>bevacizumab</td><td>11</td></tr>
## <tr><td>ibrutinib</td><td>11</td></tr>
## <tr><td>ivacaftor</td><td>11</td></tr>
## <tr><td>olaparib</td><td>11</td></tr>
## <tr><td>lenalidomide</td><td>9</td></tr>
## <tr><td>lisocabtagene maraleucel</td><td>9</td></tr>
## <tr><td>adalimumab</td><td>8</td></tr>
## <tr><td>axicabtagene ciloleucel</td><td>7</td></tr>
## <tr><td>brentuximab vedotin</td><td>7</td></tr>
## <tr><td>crizotinib</td><td>7</td></tr>
## <tr><td>daratumumab</td><td>7</td></tr>
## <tr><td>ravulizumab-cwvz</td><td>7</td></tr>
## <tr><td>zanubrutinib</td><td>7</td></tr>
## <tr><td>canakinumab</td><td>6</td></tr>
## <tr><td>isavuconazonium sulfate</td><td>6</td></tr>
## <tr><td>rituximab</td><td>6</td></tr>
## <tr><td>selpercatinib</td><td>6</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr></table>
Select only new drugs in the US
us_orphan_drugs_new <- us_orphan_drugs %>%
group_by(generic_name) %>%
slice_min(order_by = marketing_approval_date, n = 1, with_ties = FALSE) %>% # Select the entry with the earliest market authorization year
ungroup()
us_orphan_drugs_new
us_orphan_drugs_new$Marketing.year <- year(as.Date(us_orphan_drugs_new$marketing_approval_date))
us_new_od_ma_freqs <- as.data.frame(table(us_orphan_drugs_new$Marketing.year))
colnames(us_new_od_ma_freqs) <- c("Year", "Freq New Orphan Drug MAs")
us_new_od_ma_freqs
Make sure matches the “Number of Orphan Drug Approvals: 1262” from https://www.accessdata.fda.gov/scripts/opdlisting/oopd/listResult.cfm
on 9/29/2024
nrow(us_orphan_drugs[year(as.Date(us_orphan_drugs$marketing_approval_date)), ])
## [1] 1262
Select all MAs in the US format US MA date as year
us_orphan_drugs$Marketing.year <- year(as.Date(us_orphan_drugs$marketing_approval_date))
us_od_ma_freqs <- as.data.frame(table(us_orphan_drugs$Marketing.year))
colnames(us_od_ma_freqs) <- c("Year", "Freq Orphan Drug MAs")
us_od_ma_freqs$Year <- as.character(us_od_ma_freqs$Year)
us_od_ma_freqs$Year <- as.numeric(us_od_ma_freqs$Year)
us_od_ma_freqs <- us_od_ma_freqs[order(-us_od_ma_freqs$Year),]
us_od_ma_freqs <- merge(us_od_ma_freqs, us_new_od_ma_freqs, by = "Year")
us_od_ma_freqs
Graph of US Market Authorizations orphan drugs
ggplot(us_od_ma_freqs, aes(x = Year)) +
geom_line(aes(y = `Freq Orphan Drug MAs`, color="Orphan Drug MAs")) +
geom_line(aes(y = `Freq New Orphan Drug MAs`, color="New Orphan Drug MAs")) +
labs(x = "Year",
y = str_wrap("Annual Number of Market Authorizations in the US", width=40),
title=str_wrap("Annual Number of Orphan Drug Market Authorizations in the US", width=50),
) +
scale_color_manual(name = "Color", values = c("darkred", "dodgerblue"))+
theme_minimal()

us_orphan_drugs
merge US orphan drug df with PhRMA r&d df
phrma_us_rd <- phrma_us_rd[-dim(phrma_us_rd)[1],]
us_rd_mas <- merge(phrma_us_rd,us_od_ma_freqs,by="Year")
us_rd_mas
format
us_rd_mas$Domestic.R.D. <- parse_number(us_rd_mas$Domestic.R.D.) #dollars are in millions
us_rd_mas <- clean_names(us_rd_mas)
us_rd_mas
us dataframe simplified
us_rd_mas$annual_domestic_RD_spending_bil_dollars <- us_rd_mas$domestic_r_d /1000 #convert from millions of dollars to billions
us_rd_mas_simplified <- as.data.frame(cbind(us_rd_mas$year, us_rd_mas$annual_domestic_RD_spending_bil_dollars, us_rd_mas$freq_orphan_drug_m_as, us_rd_mas$freq_new_orphan_drug_m_as))
colnames(us_rd_mas_simplified) <- c("year", "annual_domestic_RD_spending_bil_dollars", "freq_orphan_drug_mas", "freq_new_orphan_drug_mas")
us_rd_mas_simplified$area <- rep("US", dim(us_rd_mas_simplified)[1])
us_rd_mas_simplified$lagged_annual_domestic_RD_spending_bil_dollars <- lag(us_rd_mas_simplified$annual_domestic_RD_spending_bil_dollars)
us_rd_mas_simplified
eu dataframe simplified
eu_rd_mas_simplified <- as.data.frame(cbind(eu_rd_mas$year, eu_rd_mas$total_rd_bil_dollars, eu_rd_mas$freq_orphan_drug_m_as, eu_rd_mas$freq_new_orphan_drug_m_as))
colnames(eu_rd_mas_simplified) <- c("year", "annual_domestic_RD_spending_bil_dollars", "freq_orphan_drug_mas", "freq_new_orphan_drug_mas")
eu_rd_mas_simplified$area <- rep("EU", dim(eu_rd_mas_simplified)[1])
eu_rd_mas_simplified$lagged_annual_domestic_RD_spending_bil_dollars <- lag(eu_rd_mas_simplified$annual_domestic_RD_spending_bil_dollars)
eu_rd_mas_simplified
EU GDP Per Capita In Dollars
https://www.macrotrends.net/global-metrics/countries/EUU/european-union/gdp-per-capita
eu_gdp_percapita_annual <- read.csv("Original Data/GDP/european-union-gdp-per-capita-annual.csv", skip=16)
eu_gdp_percapita_annual$year <- year(as.Date(eu_gdp_percapita_annual$date))
eu_gdp_percapita_annual <- clean_names(eu_gdp_percapita_annual)
colnames(eu_gdp_percapita_annual)[2] <- "gdp_per_capita_usd"
colnames(eu_gdp_percapita_annual)[3] <- "gdp_per_capita_annual_growthrate_usd"
eu_gdp_percapita_annual
Merge GDP Per Capita, GDP Growth rate into same df
eu_rd_mas_simplified <- merge(eu_rd_mas_simplified, eu_gdp_percapita_annual[,c("year", "gdp_per_capita_annual_growthrate_usd")], by = "year", all.x = TRUE)
eu_rd_mas_simplified
US GDP Per Capita In Dollars
https://www.macrotrends.net/global-metrics/countries/USA/united-states/gdp-per-capita
us_gdp_percapita_annual <- read.csv("Original Data/GDP/united-states-gdp-per-capita-annual.csv", skip=16)
us_gdp_percapita_annual$year <- year(as.Date(us_gdp_percapita_annual$date))
us_gdp_percapita_annual <- clean_names(us_gdp_percapita_annual)
colnames(us_gdp_percapita_annual)[2] <- "gdp_per_capita_usd"
colnames(us_gdp_percapita_annual)[3] <- "gdp_per_capita_annual_growthrate_usd"
us_gdp_percapita_annual
Merge GDP Per Capita, GDP Growth rate into same df
us_rd_mas_simplified <- merge(us_rd_mas_simplified, us_gdp_percapita_annual[,c("year", "gdp_per_capita_annual_growthrate_usd")], by = "year", all.x = TRUE)
us_rd_mas_simplified
Limit dataframe to 2004 and later, make lagged R&D spending NA at
year 2004
combined_rd_ma_df <- rbind(eu_rd_mas_simplified, us_rd_mas_simplified)
combined_rd_ma_df <- combined_rd_ma_df %>%
filter(year > 2003)
combined_rd_ma_df[combined_rd_ma_df$area =="US" & combined_rd_ma_df$year==2004,]$lagged_annual_domestic_RD_spending_bil_dollars <- NA
combined_rd_ma_df
ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_orphan_drug_mas, color = area)) +
geom_point() +
labs(x = "Annual Domestic RD Spending (Billions USD)",
y = "Number of Orphan Drug Market Authorizations",
color = "Area",
title=str_wrap("Orphan Drug Market Authorizations by Annual Domestic R&D Spending in the EU and US", 60)) +
scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()

ggplot(combined_rd_ma_df, aes(x = year , y = freq_orphan_drug_mas, color = area)) +
geom_line() +
labs(x = "Year",
y = str_wrap("Annual Number of Orphan Drug Market Authorizations", width=30),
color = "Area",
title=str_wrap("Annual Number of Orphan Drug Market Authorizations in the EU and US", width=40)) +
theme_minimal() +
scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100'))

ggplot(combined_rd_ma_df, aes(x = year , y = freq_new_orphan_drug_mas, color = area)) +
geom_line() +
labs(x = "Year",
y = str_wrap("Annual Number of Market Authorizations for New Orphan Drugs", width=40),
color = "Area",
title=str_wrap("Annual Number of Market Authorizations for New Orphan Drugs in the EU and US", width=40)) +
theme_minimal() +
scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100'))

ggplot(combined_rd_ma_df, aes(x = year, y = annual_domestic_RD_spending_bil_dollars, color = area)) +
geom_line() +
labs(x = "Year",
y = "Annual Domestic R&D Spending (Billions USD)",
color = "Area",
title="Annual Domestic R&D Spending in the EU and US") +
scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()

ggplot(combined_rd_ma_df, aes(x = year, y = gdp_per_capita_annual_growthrate_usd, color = area)) +
geom_line() +
labs(x = "Year",
y = "GDP Per Capita on last day of Year (USD)",
color = "Area",
title="GDP Per Capita on last day of Year in the EU and US") +
scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()

Orphan Drug Frequencies by Year by Area
hist(subset(combined_rd_ma_df, area=="US")$freq_orphan_drug_mas, main="US Frequencies of Annual Orphan Drug Market Authorizations", xlab="Number of Annual US Orphan Drug Market Authorizations")

hist(subset(combined_rd_ma_df, area=="EU")$freq_orphan_drug_mas, main="EU Frequencies of Annual Orphan Drug Market Authorizations",xlab="Number of Annual EU Orphan Drug Market Authorizations")

New Orphan Drugs Frequencies by Year by Area
hist(subset(combined_rd_ma_df, area=="US")$freq_new_orphan_drug_mas, main="US Frequencies of Annual New Orphan Drug Market Authorizations", xlab="Number of Annual US Orphan Drug Market Authorizations")

hist(subset(combined_rd_ma_df, area=="EU")$freq_new_orphan_drug_mas, main="EU Frequencies of Annual New Orphan Drug Market Authorizations",xlab="Number of Annual EU Orphan Drug Market Authorizations")

Making a years after 2004 variable
combined_rd_ma_df$yearsafter2004 <- combined_rd_ma_df$year - 2004
combined_rd_ma_df
Summmaries
R&D spending growth rates by year
rdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "EU", ]$annual_domestic_RD_spending_bil_dollars
laggedrdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "EU", ]$lagged_annual_domestic_RD_spending_bil_dollars
EU_annual_domestic_RD_spending_bil_dollars_growth_rate <- rdspending/laggedrdspending
EU_annual_domestic_RD_spending_bil_dollars_growth_rate
## [1] NA 1.0296598 1.1212038 0.9661782 0.9511425 1.0932108 1.0586718
## [8] 0.9953716 1.1192369 0.9830681 1.0146180 1.3017754 1.0116816 1.0219124
## [15] 0.9845789 1.0954102 1.0319511 1.0361913
rdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "US", ]$annual_domestic_RD_spending_bil_dollars
laggedrdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "US", ]$lagged_annual_domestic_RD_spending_bil_dollars
US_annual_domestic_RD_spending_bil_dollars_growth_rate <- rdspending/laggedrdspending
US_annual_domestic_RD_spending_bil_dollars_growth_rate
## [1] NA 1.0478253 1.0968355 1.0777352 0.9716650 0.9939530 1.1508117
## [8] 0.8939616 1.0312479 1.0769337 1.0084489 1.1809938 1.0895376 1.0636573
## [15] 1.1159483 1.0343509 1.1251628 1.0994019
rdspending_growth_rates <- data.frame("Year"=seq(2004,2021), "US"=round(US_annual_domestic_RD_spending_bil_dollars_growth_rate,2), "EU"=round(EU_annual_domestic_RD_spending_bil_dollars_growth_rate,2))
rdspending_growth_rates$US <- as.character(rdspending_growth_rates$US )
rdspending_growth_rates$EU <- as.character(rdspending_growth_rates$EU )
# make 2011 NA because 2011 values were imputed
rdspending_growth_rates[rdspending_growth_rates$Year == 2011,]$EU <- "NA"
rdspending_growth_rates[rdspending_growth_rates$Year == 2012,]$EU <- "NA"
rdspending_growth_rates
stargazer(rdspending_growth_rates, out="Final Figures Formatted/RDSpendingAnnualGrowthRates.htm" , summary=FALSE, rownames=FALSE, colnames = TRUE, digits=2, digit.separator="",column.sep.width= "15pt", title="Annual Growth Rate of Phramaceutical R&D Spending")
##
## % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
## % Date and time: Wed, Oct 30, 2024 - 14:06:11
## \begin{table}[!htbp] \centering
## \caption{Annual Growth Rate of Phramaceutical R&D Spending}
## \label{}
## \begin{tabular}{@{\extracolsep{15pt}} ccc}
## \\[-1.8ex]\hline
## \hline \\[-1.8ex]
## Year & US & EU \\
## \hline \\[-1.8ex]
## $2004$ & & \\
## $2005$ & 1.05 & 1.03 \\
## $2006$ & 1.1 & 1.12 \\
## $2007$ & 1.08 & 0.97 \\
## $2008$ & 0.97 & 0.95 \\
## $2009$ & 0.99 & 1.09 \\
## $2010$ & 1.15 & 1.06 \\
## $2011$ & 0.89 & NA \\
## $2012$ & 1.03 & NA \\
## $2013$ & 1.08 & 0.98 \\
## $2014$ & 1.01 & 1.01 \\
## $2015$ & 1.18 & 1.3 \\
## $2016$ & 1.09 & 1.01 \\
## $2017$ & 1.06 & 1.02 \\
## $2018$ & 1.12 & 0.98 \\
## $2019$ & 1.03 & 1.1 \\
## $2020$ & 1.13 & 1.03 \\
## $2021$ & 1.1 & 1.04 \\
## \hline \\[-1.8ex]
## \end{tabular}
## \end{table}
Linear Models of Annual Domestic R&D Spending
Plot Linear Models of Annual Domestic R&D Spending
plot_linear_rd <- function(linear_model, model_number){
#Plot residuals and acf/ pacf of residuals by panel (EU/US)
combined_rd_ma_df$residuals <- resid(linear_model)
par(mfrow =c(2,4))
us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
acf(us_resid_data$residuals, main="ACF of US Residuals")
pacf(us_resid_data$residuals, main="PACF of US Residuals")
qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
acf(eu_resid_data$residuals, main="ACF of EU Residuals")
pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
mtext(glue('Linear Model {model_number} of Annual Domestic R&D Spending - Residual Plots'), side = 3, line = -1.1, outer = TRUE)
predicted <- predict(linear_model , type = "response")
ggplot(combined_rd_ma_df, aes(x = year, y = annual_domestic_RD_spending_bil_dollars, color = area, alpha="Observed")) +
geom_point() +
labs(x = "Year",
y = "Annual Domestic R&D Spending (Billions USD)",
color = "Area",
alpha = "Line Type",
title=glue('Linear Model {model_number} of Annual Domestic R&D Spending'))+
scale_color_manual(name = "Area", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
scale_alpha_manual(values = c(0.5, 1))+
theme_minimal()
# https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
#Controlling legend appearance in ggplot2 with override.aes
#July 9, 2020 · @aosmith16
#Ariel Muldoon
# https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
# Teun van den Brand
# 2024/02/26
}
get_rd_model_metrics <- function(model, model_type){
#Get RMSE
predicted <- predict(model, type = "response")
actual <- combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars
model_rmse <- rmse(actual=actual, predicted=predicted)
model_df1 <- summary(model)$fstatistic[2]
model_df2 <- summary(model)$fstatistic[3]
model_fstat <- summary(model)$fstatistic[1]
model_pval <- pf(summary(model)$fstatistic[1], summary(model)$fstatistic[2], summary(model)$fstatistic[3], lower.tail = FALSE)
# https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
# Renesh Bedre
# March 26, 2023
model_adjRsq <- summary(model)$adj.r.squared
model_aic <- AIC(model)
metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq)
rownames(metrics_df) <- NULL
return(metrics_df)
}
Linear Model 1 of Annual Domestic R&D Spending
mod.rd.1 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) , data=combined_rd_ma_df)
summary(mod.rd.1)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area),
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.701 -7.570 -4.119 6.193 33.354
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.970 2.712 9.208 9.23e-11 ***
## as.factor(area)US 21.286 3.835 5.551 3.31e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.5 on 34 degrees of freedom
## Multiple R-squared: 0.4754, Adjusted R-squared: 0.46
## F-statistic: 30.81 on 1 and 34 DF, p-value: 3.305e-06
plot_linear_rd(mod.rd.1, "1")


#Estimate covariance matrix with Newey West
mod.rd.1.vcov <- vcovPL(mod.rd.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.1 <- coeftest(mod.rd.1, vcov = mod.rd.1.vcov)
mod.rd.nw.1
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.9701 2.4554 10.1695 7.571e-12 ***
## as.factor(area)US 21.2863 3.1408 6.7774 8.581e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.1)
## 2.5 % 97.5 %
## (Intercept) 19.98015 29.96002
## as.factor(area)US 14.90350 27.66917
#Get Metrics
mod.rd.1.metrics <- get_rd_model_metrics(mod.rd.1, "Linear")
Linear Model 2 of Annual Domestic R&D Spending
mod.rd.2 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df)
summary(mod.rd.2)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) +
## yearsafter2004, data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.3415 -3.6925 -0.8054 1.4060 17.3602
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.9767 2.0536 4.371 0.000116 ***
## as.factor(area)US 21.2863 1.8977 11.217 8.46e-13 ***
## yearsafter2004 1.8816 0.1829 10.288 7.91e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.693 on 33 degrees of freedom
## Multiple R-squared: 0.8753, Adjusted R-squared: 0.8678
## F-statistic: 115.8 on 2 and 33 DF, p-value: 1.205e-15
plot_linear_rd(mod.rd.2, "2")


#Estimate covariance matrix with Newey West
mod.rd.2.vcov <- vcovPL(mod.rd.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.2 <- coeftest(mod.rd.2, vcov = mod.rd.2.vcov)
mod.rd.nw.2
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.97674 3.27862 2.738 0.009885 **
## as.factor(area)US 21.28634 3.18801 6.677 1.335e-07 ***
## yearsafter2004 1.88157 0.22857 8.232 1.662e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.2)
## 2.5 % 97.5 %
## (Intercept) 2.306343 15.647146
## as.factor(area)US 14.800276 27.772399
## yearsafter2004 1.416543 2.346596
#Get Metrics
mod.rd.2.metrics <- get_rd_model_metrics(mod.rd.2, "Linear")
Linear Model 3 of Annual Domestic R&D Spending
mod.rd.3 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + gdp_per_capita_annual_growthrate_usd + yearsafter2004, data=combined_rd_ma_df)
summary(mod.rd.3)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) +
## gdp_per_capita_annual_growthrate_usd + yearsafter2004, data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4165 -3.1174 -0.8098 2.6095 15.7651
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.1200 2.1801 3.725 0.000754 ***
## as.factor(area)US 21.2716 1.8896 11.257 1.16e-12 ***
## gdp_per_capita_annual_growthrate_usd 0.1831 0.1616 1.133 0.265549
## yearsafter2004 1.9132 0.1842 10.385 8.93e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.669 on 32 degrees of freedom
## Multiple R-squared: 0.8801, Adjusted R-squared: 0.8689
## F-statistic: 78.31 on 3 and 32 DF, p-value: 7.91e-15
plot_linear_rd(mod.rd.3, "3")


#Estimate covariance matrix with Newey West
mod.rd.3.vcov <- vcovPL(mod.rd.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.3 <- coeftest(mod.rd.3, vcov = mod.rd.3.vcov)
mod.rd.nw.3
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.12002 2.91992 2.7809 0.009008 **
## as.factor(area)US 21.27158 3.22110 6.6038 1.913e-07 ***
## gdp_per_capita_annual_growthrate_usd 0.18314 0.10027 1.8266 0.077107 .
## yearsafter2004 1.91322 0.20108 9.5147 7.547e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.3)
## 2.5 % 97.5 %
## (Intercept) 2.17234330 14.0676976
## as.factor(area)US 14.71041811 27.8327508
## gdp_per_capita_annual_growthrate_usd -0.02109303 0.3873803
## yearsafter2004 1.50363514 2.3228064
#Get Metrics
mod.rd.3.metrics <- get_rd_model_metrics(mod.rd.3, "Linear")
VIFs of preditors
vif(mod.rd.3)
## as.factor(area) gdp_per_capita_annual_growthrate_usd
## 1.000047 1.023572
## yearsafter2004
## 1.023525
Linear Model 4 of Annual Domestic R&D Spending
mod.rd.4 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004, data=combined_rd_ma_df)
summary(mod.rd.4)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) +
## yearsafter2004 + as.factor(area):yearsafter2004, data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.3946 -2.1954 0.9159 1.8342 11.3927
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.9442 1.9458 7.680 9.38e-09 ***
## as.factor(area)US 9.3514 2.7518 3.398 0.00183 **
## yearsafter2004 1.1795 0.1954 6.036 9.77e-07 ***
## as.factor(area)US:yearsafter2004 1.4041 0.2763 5.081 1.57e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.301 on 32 degrees of freedom
## Multiple R-squared: 0.931, Adjusted R-squared: 0.9245
## F-statistic: 143.9 on 3 and 32 DF, p-value: < 2.2e-16
plot_linear_rd(mod.rd.4, "4")


#Estimate covariance matrix with Newey West
mod.rd.4.vcov <- vcovPL(mod.rd.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.4 <- coeftest(mod.rd.4, vcov = mod.rd.4.vcov)
mod.rd.nw.4
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.944238 1.138132 13.1305 1.972e-14 ***
## as.factor(area)US 9.351351 2.470300 3.7855 0.0006372 ***
## yearsafter2004 1.179511 0.095676 12.3282 1.073e-13 ***
## as.factor(area)US:yearsafter2004 1.404116 0.321119 4.3726 0.0001217 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.4)
## 2.5 % 97.5 %
## (Intercept) 12.6259391 17.262536
## as.factor(area)US 4.3195141 14.383188
## yearsafter2004 0.9846264 1.374396
## as.factor(area)US:yearsafter2004 0.7500184 2.058214
#Get Metrics
mod.rd.4.metrics <- get_rd_model_metrics(mod.rd.4, "Linear")
Linear Model 5 of Annual Domestic R&D Spending
mod.rd.5 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + gdp_per_capita_annual_growthrate_usd + yearsafter2004 + yearsafter2004:as.factor(area), data=combined_rd_ma_df)
summary(mod.rd.5)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) +
## gdp_per_capita_annual_growthrate_usd + yearsafter2004 + yearsafter2004:as.factor(area),
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.4057 -2.2988 0.3359 1.5807 10.7972
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.42340 2.11349 6.824 1.20e-07 ***
## as.factor(area)US 9.60610 2.80244 3.428 0.00174 **
## gdp_per_capita_annual_growthrate_usd 0.08339 0.12540 0.665 0.51094
## yearsafter2004 1.20930 0.20215 5.982 1.29e-06 ***
## as.factor(area)US:yearsafter2004 1.37335 0.28259 4.860 3.21e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.339 on 31 degrees of freedom
## Multiple R-squared: 0.932, Adjusted R-squared: 0.9232
## F-statistic: 106.2 on 4 and 31 DF, p-value: < 2.2e-16
plot_linear_rd(mod.rd.5, "5")


#Estimate covariance matrix with Newey West
mod.rd.5.vcov <- vcovPL(mod.rd.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.5 <- coeftest(mod.rd.5, vcov = mod.rd.5.vcov)
mod.rd.nw.5
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.423396 0.895858 16.1001 < 2.2e-16 ***
## as.factor(area)US 9.606103 2.668712 3.5995 0.0010962 **
## gdp_per_capita_annual_growthrate_usd 0.083394 0.078564 1.0615 0.2966708
## yearsafter2004 1.209304 0.073613 16.4280 < 2.2e-16 ***
## as.factor(area)US:yearsafter2004 1.373355 0.330965 4.1496 0.0002407 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.5)
## 2.5 % 97.5 %
## (Intercept) 12.59628170 16.2505110
## as.factor(area)US 4.16322798 15.0489771
## gdp_per_capita_annual_growthrate_usd -0.07683769 0.2436255
## yearsafter2004 1.05917056 1.3594380
## as.factor(area)US:yearsafter2004 0.69834823 2.0483617
#Get RMSE
predicted <- predict(mod.rd.5, type = "response")
actual <- combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars
mod.rd.5.rmse <- rmse(actual=actual, predicted=predicted)
#Get Metrics
mod.rd.5.metrics <- get_rd_model_metrics(mod.rd.5, "Linear")
Linear Model 6 of Annual Domestic R&D Spending
mod.rd.6 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + gdp_per_capita_annual_growthrate_usd + yearsafter2004 + yearsafter2004:as.factor(area) + +I(yearsafter2004^2) + I(yearsafter2004^2):as.factor(area), data=combined_rd_ma_df)
summary(mod.rd.6)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) +
## gdp_per_capita_annual_growthrate_usd + yearsafter2004 + yearsafter2004:as.factor(area) +
## +I(yearsafter2004^2) + I(yearsafter2004^2):as.factor(area),
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.2500 -0.9883 -0.3515 1.2539 5.1768
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.14473 1.63419 11.103 5.84e-12 ***
## as.factor(area)US 15.85117 1.99412 7.949 9.12e-09 ***
## gdp_per_capita_annual_growthrate_usd -0.08329 0.06975 -1.194 0.2421
## yearsafter2004 0.14465 0.42207 0.343 0.7343
## I(yearsafter2004^2) 0.05912 0.02348 2.518 0.0176 *
## as.factor(area)US:yearsafter2004 -1.09801 0.54375 -2.019 0.0528 .
## as.factor(area)US:I(yearsafter2004^2) 0.14899 0.03057 4.874 3.60e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.142 on 29 degrees of freedom
## Multiple R-squared: 0.9845, Adjusted R-squared: 0.9813
## F-statistic: 306.7 on 6 and 29 DF, p-value: < 2.2e-16
plot_linear_rd(mod.rd.6, "6")


#Estimate covariance matrix with Newey West
mod.rd.6.vcov <- vcovPL(mod.rd.6, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.6 <- coeftest(mod.rd.6, vcov = mod.rd.6.vcov)
mod.rd.nw.6
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.144732 0.916141 19.8056 < 2.2e-16
## as.factor(area)US 15.851175 2.428745 6.5265 3.799e-07
## gdp_per_capita_annual_growthrate_usd -0.083292 0.052153 -1.5971 0.1210926
## yearsafter2004 0.144646 0.283582 0.5101 0.6138653
## I(yearsafter2004^2) 0.059124 0.015743 3.7555 0.0007736
## as.factor(area)US:yearsafter2004 -1.098009 0.618239 -1.7760 0.0862235
## as.factor(area)US:I(yearsafter2004^2) 0.148991 0.034491 4.3197 0.0001669
##
## (Intercept) ***
## as.factor(area)US ***
## gdp_per_capita_annual_growthrate_usd
## yearsafter2004
## I(yearsafter2004^2) ***
## as.factor(area)US:yearsafter2004 .
## as.factor(area)US:I(yearsafter2004^2) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.6)
## 2.5 % 97.5 %
## (Intercept) 16.27101332 20.01845100
## as.factor(area)US 10.88383349 20.81851583
## gdp_per_capita_annual_growthrate_usd -0.18995673 0.02337328
## yearsafter2004 -0.43534481 0.72463632
## I(yearsafter2004^2) 0.02692573 0.09132242
## as.factor(area)US:yearsafter2004 -2.36245040 0.16643231
## as.factor(area)US:I(yearsafter2004^2) 0.07844841 0.21953376
#Get Metrics
mod.rd.6.metrics <- get_rd_model_metrics(mod.rd.6, "Linear")
mod.rd.metrics <- rbind(mod.rd.1.metrics, mod.rd.2.metrics, mod.rd.3.metrics, mod.rd.4.metrics, mod.rd.5.metrics, mod.rd.6.metrics)
aic_values <- round(mod.rd.metrics$aic)
rmse_values <- round(mod.rd.metrics$rmse, 2)
fstat_values <- round(mod.rd.metrics$fstat, 2)
p_values <- round(mod.rd.metrics$pval, 4)
adjrsq_values <- round(mod.rd.metrics$adjrsq, 2)
stargazer(mod.rd.1, mod.rd.2, mod.rd.3, mod.rd.4, mod.rd.5, mod.rd.6,
title="Linear Models of Annual Domestic Pharmaceutical R&D Spending (Not Adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/rdmodelsNonAdjusted.htm",
covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>2</sub>)", "Years After 2004 (β<sup>̂</sup><sub>3</sub>)", "Years After 2004^{2} (β<sup>̂</sup><sub>4</sub>)", "US:Years After 2004 (β<sup>̂</sup><sub>5</sub>)", "US:Years After 2004^{2} (β<sup>̂</sup><sub>6</sub>)", "Intercept (β<sup>̂</sup><sub>0</sub>)"),
dep.var.labels=c("Annual Domestic Pharmaceutical R&D Spending (Billion USD)"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Linear Models of Annual Domestic Pharmaceutical R&D Spending (Not Adjusted for Error Structure)</strong></caption>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="6">Annual Domestic Pharmaceutical R&D Spending (Billion USD)</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td><td>Model 6</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>21.286<sup>***</sup></td><td>21.286<sup>***</sup></td><td>21.272<sup>***</sup></td><td>9.351<sup>***</sup></td><td>9.606<sup>***</sup></td><td>15.851<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(3.835)</td><td>(1.898)</td><td>(1.890)</td><td>(2.752)</td><td>(2.802)</td><td>(1.994)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>2</sub>)</td><td></td><td></td><td>0.183</td><td></td><td>0.083</td><td>-0.083</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.162)</td><td></td><td>(0.125)</td><td>(0.070)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>1.882<sup>***</sup></td><td>1.913<sup>***</sup></td><td>1.180<sup>***</sup></td><td>1.209<sup>***</sup></td><td>0.145</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.183)</td><td>(0.184)</td><td>(0.195)</td><td>(0.202)</td><td>(0.422)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004<sup>2</sup> (β<sup>̂</sup><sub>4</sub>)</td><td></td><td></td><td></td><td></td><td></td><td>0.059<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.023)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004 (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td></td><td>1.404<sup>***</sup></td><td>1.373<sup>***</sup></td><td>-1.098<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.276)</td><td>(0.283)</td><td>(0.544)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004<sup>2</sup> (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td></td><td></td><td>0.149<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.031)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>24.970<sup>***</sup></td><td>8.977<sup>***</sup></td><td>8.120<sup>***</sup></td><td>14.944<sup>***</sup></td><td>14.423<sup>***</sup></td><td>18.145<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(2.712)</td><td>(2.054)</td><td>(2.180)</td><td>(1.946)</td><td>(2.113)</td><td>(1.634)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>282</td><td>232</td><td>233</td><td>213</td><td>214</td><td>165</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>11.18</td><td>5.45</td><td>5.34</td><td>4.06</td><td>4.03</td><td>1.92</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.46</td><td>0.87</td><td>0.87</td><td>0.92</td><td>0.92</td><td>0.98</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>30.81</td><td>115.83</td><td>78.31</td><td>143.9</td><td>106.16</td><td>306.67</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.rd.nw.1
m2 <- mod.rd.nw.2
m3 <- mod.rd.nw.3
m4 <- mod.rd.nw.4
m5 <- mod.rd.nw.5
m6 <- mod.rd.nw.6
stargazer(m1, m2, m3, m4, m5, m6, title="Newey-West Adjusted Linear Models of Annual Domestic Pharmaceutical R&D Spending", align=TRUE, type = "html", out="Final Models Formatted/rdmodels.htm",
covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>2</sub>)", "Years After 2004 (β<sup>̂</sup><sub>3</sub>)", "Years After 2004^{2} (β<sup>̂</sup><sub>4</sub>)", "US:Years After 2004 (β<sup>̂</sup><sub>5</sub>)", "US:Years After 2004^{2} (β<sup>̂</sup><sub>6</sub>)", "Intercept (β<sup>̂</sup><sub>0</sub>)"),
dep.var.labels=c("Annual Domestic Pharmaceutical R&D Spending (Billion USD)"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"),
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Annual Domestic Pharmaceutical R&D Spending</strong></caption>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="6">Annual Domestic Pharmaceutical R&D Spending (Billion USD)</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td><td>Model 6</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td><td>(4)</td><td>(5)</td><td>(6)</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>21.286<sup>***</sup></td><td>21.286<sup>***</sup></td><td>21.272<sup>***</sup></td><td>9.351<sup>***</sup></td><td>9.606<sup>***</sup></td><td>15.851<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(3.141)</td><td>(3.188)</td><td>(3.221)</td><td>(2.470)</td><td>(2.669)</td><td>(2.429)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>2</sub>)</td><td></td><td></td><td>0.183<sup>*</sup></td><td></td><td>0.083</td><td>-0.083</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.100)</td><td></td><td>(0.079)</td><td>(0.052)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>1.882<sup>***</sup></td><td>1.913<sup>***</sup></td><td>1.180<sup>***</sup></td><td>1.209<sup>***</sup></td><td>0.145</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.229)</td><td>(0.201)</td><td>(0.096)</td><td>(0.074)</td><td>(0.284)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004<sup>2</sup> (β<sup>̂</sup><sub>4</sub>)</td><td></td><td></td><td></td><td></td><td></td><td>0.059<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.016)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004 (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td></td><td>1.404<sup>***</sup></td><td>1.373<sup>***</sup></td><td>-1.098<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.321)</td><td>(0.331)</td><td>(0.618)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004<sup>2</sup> (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td></td><td></td><td>0.149<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.034)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>24.970<sup>***</sup></td><td>8.977<sup>***</sup></td><td>8.120<sup>***</sup></td><td>14.944<sup>***</sup></td><td>14.423<sup>***</sup></td><td>18.145<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(2.455)</td><td>(3.279)</td><td>(2.920)</td><td>(1.138)</td><td>(0.896)</td><td>(0.916)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>282</td><td>232</td><td>233</td><td>213</td><td>214</td><td>165</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>11.18</td><td>5.45</td><td>5.34</td><td>4.06</td><td>4.03</td><td>1.92</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.46</td><td>0.87</td><td>0.87</td><td>0.92</td><td>0.92</td><td>0.98</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>30.81</td><td>115.83</td><td>78.31</td><td>143.9</td><td>106.16</td><td>306.67</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
Linear Models of Number of Orphan Drug Market Authorizations
Plot Models of Number of Orphan Drug Market Authorizations (Linear or
Poisson Models)
plot_lin_pois_od <- function(model, model_number, model_type){
#Plot residuals and acf/ pacf of residuals by panel (EU/US)
combined_rd_ma_df$residuals <- resid(model)
par(mfrow =c(2,4))
us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
acf(us_resid_data$residuals, main="ACF of US Residuals")
pacf(us_resid_data$residuals, main="PACF of US Residuals")
qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
acf(eu_resid_data$residuals, main="ACF of EU Residuals")
pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
mtext(glue('{model_type} Model {model_number} of Annual Orphan Drug MAs - Residual Plots'), side = 3, line = -1.1, outer = TRUE)
predicted <- predict(model , type = "response")
ggplot(combined_rd_ma_df, aes(x = year, y = freq_orphan_drug_mas, color = area, alpha="Observed")) +
geom_point() +
labs(x = "Year",
y = str_wrap("Number of Annual Orphan Drug Market Authorizations", width = 40),
color = "Area",
alpha = "Line Type",
title=str_wrap(glue("{model_type} Model {model_number} of Annual Orphan Drug Market Authorizations"), width = 60))+
scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
scale_alpha_manual(values = c(0.5, 1))+
theme_minimal()
# https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
#Controlling legend appearance in ggplot2 with override.aes
#July 9, 2020 · @aosmith16
# https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
#Teun van den Brand
#2024/02/26
}
Get Metrics of Models of Number of Orphan Drug Market Authorizations
(Linear or Poisson Models)
get_od_model_metrics <- function(model, model_type){
#Get RMSE
predicted <- predict(model, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
model_rmse <- rmse(actual=actual, predicted=predicted)
model_df1 <- NA
model_df2 <- NA
model_adjRsq <- NA
model_fstat <- NA
model_pval <- NA
model_loglik <- NA
if(model_type == "Linear"){
model_df1 <- summary(model)$fstatistic[2]
model_df2 <- summary(model)$fstatistic[3]
model_fstat <- summary(model)$fstatistic[1]
model_pval <- pf(summary(model)$fstatistic[1], summary(model)$fstatistic[2], summary(model)$fstatistic[3], lower.tail = FALSE)
# https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
# Renesh Bedre
# March 26, 2023
model_adjRsq <- summary(model)$adj.r.squared
}
if(model_type == "Poisson"){
model_loglik <- logLik(model)
}
model_aic <- AIC(model)
metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq, "LogLik" = model_loglik)
rownames(metrics_df) <- NULL
return(metrics_df)
}
Linear Model 1 of Num of Annual Orphan Drug MAs
mod.od.1 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 , data=combined_rd_ma_df)
summary(mod.od.1)
##
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.210 -11.451 -2.210 9.258 34.673
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.1974 5.5594 -2.374 0.0236 *
## as.factor(area)US 32.9444 5.1374 6.413 2.88e-07 ***
## yearsafter2004 2.8271 0.4951 5.710 2.27e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.41 on 33 degrees of freedom
## Multiple R-squared: 0.6908, Adjusted R-squared: 0.6721
## F-statistic: 36.86 on 2 and 33 DF, p-value: 3.881e-09
plot_lin_pois_od(mod.od.1, "1", "Linear")


#Estimate covariance matrix with Newey West
mod.od.1.vcov <- vcovPL(mod.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.od.nw.1 <-coeftest(mod.od.1, vcov = mod.od.1.vcov)
mod.od.nw.1
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.19737 7.38616 -1.7868 0.083165 .
## as.factor(area)US 32.94444 9.89037 3.3310 0.002141 **
## yearsafter2004 2.82714 0.37832 7.4728 1.369e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Get RMSE
predicted <- predict(mod.od.1, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
mod.od.1.rmse <- rmse(actual=actual, predicted=predicted)
confint(mod.od.nw.1)
## 2.5 % 97.5 %
## (Intercept) -28.224621 1.829884
## as.factor(area)US 12.822344 53.066545
## yearsafter2004 2.057436 3.596847
mod.od.1.metrics <- get_od_model_metrics(mod.od.1, "Linear")
Linear Model 2 of Num of Annual Orphan Drug MAs
mod.od.2 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.od.2)
##
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.8357 -4.5679 0.6621 3.4228 23.2087
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -32.2305 4.7330 -6.810 1.25e-07
## as.factor(area)US -9.8002 7.6308 -1.284 0.209
## yearsafter2004 -0.9122 0.6983 -1.306 0.201
## annual_domestic_RD_spending_bil_dollars 2.0073 0.3205 6.262 5.83e-07
## gdp_per_capita_annual_growthrate_usd 0.2169 0.2989 0.726 0.474
##
## (Intercept) ***
## as.factor(area)US
## yearsafter2004
## annual_domestic_RD_spending_bil_dollars ***
## gdp_per_capita_annual_growthrate_usd
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.28 on 31 degrees of freedom
## Multiple R-squared: 0.8708, Adjusted R-squared: 0.8541
## F-statistic: 52.24 on 4 and 31 DF, p-value: 2.429e-13
plot_lin_pois_od(mod.od.2, "2", "Linear")


#Estimate covariance matrix with Newey West
mod.od.2.vcov <- vcovPL(mod.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.od.nw.2 <-coeftest(mod.od.2, vcov = mod.od.2.vcov)
mod.od.nw.2
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -32.23048 5.15161 -6.2564 5.928e-07
## as.factor(area)US -9.80022 8.02606 -1.2211 0.2313
## yearsafter2004 -0.91218 0.60024 -1.5197 0.1387
## annual_domestic_RD_spending_bil_dollars 2.00726 0.31897 6.2928 5.348e-07
## gdp_per_capita_annual_growthrate_usd 0.21686 0.24590 0.8819 0.3846
##
## (Intercept) ***
## as.factor(area)US
## yearsafter2004
## annual_domestic_RD_spending_bil_dollars ***
## gdp_per_capita_annual_growthrate_usd
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Get RMSE
predicted <- predict(mod.od.2, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
mod.od.2.rmse <- rmse(actual=actual, predicted=predicted)
confint(mod.od.nw.2)
## 2.5 % 97.5 %
## (Intercept) -42.737264 -21.7237059
## as.factor(area)US -26.169472 6.5690240
## yearsafter2004 -2.136377 0.3120218
## annual_domestic_RD_spending_bil_dollars 1.356706 2.6578126
## gdp_per_capita_annual_growthrate_usd -0.284644 0.7183715
mod.od.2.metrics <- get_od_model_metrics(mod.od.2, "Linear")
VIFs of predictors
vif(mod.od.2)
## as.factor(area) yearsafter2004
## 4.960278 4.472835
## annual_domestic_RD_spending_bil_dollars gdp_per_capita_annual_growthrate_usd
## 8.341983 1.064648
cor(cbind(yearsafter2004=combined_rd_ma_df$yearsafter2004, annual_domestic_RD_spending_bil_dollars=combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars, gdp_per_capita_annual_growthrate_usd=combined_rd_ma_df$gdp_per_capita_annual_growthrate_usd))
## yearsafter2004
## yearsafter2004 1.0000000
## annual_domestic_RD_spending_bil_dollars 0.6323924
## gdp_per_capita_annual_growthrate_usd -0.1516009
## annual_domestic_RD_spending_bil_dollars
## yearsafter2004 0.63239241
## annual_domestic_RD_spending_bil_dollars 1.00000000
## gdp_per_capita_annual_growthrate_usd -0.02262092
## gdp_per_capita_annual_growthrate_usd
## yearsafter2004 -0.15160094
## annual_domestic_RD_spending_bil_dollars -0.02262092
## gdp_per_capita_annual_growthrate_usd 1.00000000
Linear Model 3 of Num of Annual Orphan Drug MAs
mod.od.3 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004 , data=combined_rd_ma_df)
summary(mod.od.3)
##
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.1119 -4.6375 -0.4147 5.1218 21.9930
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1015 4.7330 0.867 0.393
## as.factor(area)US -4.1771 6.2759 -0.666 0.511
## yearsafter2004 0.6910 0.4527 1.526 0.137
## gdp_per_capita_annual_growthrate_usd 0.2675 0.2808 0.952 0.348
## as.factor(area)US:yearsafter2004 4.3647 0.6328 6.897 9.85e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.717 on 31 degrees of freedom
## Multiple R-squared: 0.8845, Adjusted R-squared: 0.8696
## F-statistic: 59.38 on 4 and 31 DF, p-value: 4.316e-14
plot_lin_pois_od(mod.od.3, "3", "Linear")


#Estimate covariance matrix with Newey West
mod.od.3.vcov <- vcovPL(mod.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.od.nw.3 <-coeftest(mod.od.3, vcov = mod.od.3.vcov)
mod.od.nw.3
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.10148 1.59477 2.5718 0.0151325 *
## as.factor(area)US -4.17711 8.02490 -0.5205 0.6064004
## yearsafter2004 0.69101 0.16355 4.2251 0.0001947 ***
## gdp_per_capita_annual_growthrate_usd 0.26746 0.15363 1.7409 0.0916193 .
## as.factor(area)US:yearsafter2004 4.36471 0.76468 5.7079 2.824e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Get RMSE
predicted <- predict(mod.od.3, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
mod.od.3.rmse <- rmse(actual=actual, predicted=predicted)
confint(mod.od.nw.3)
## 2.5 % 97.5 %
## (Intercept) 0.84892641 7.3540291
## as.factor(area)US -20.54400474 12.1897872
## yearsafter2004 0.35744757 1.0245755
## gdp_per_capita_annual_growthrate_usd -0.04587725 0.5808023
## as.factor(area)US:yearsafter2004 2.80512894 5.9242846
mod.od.3.metrics <- get_od_model_metrics(mod.od.3, "Linear")
Linear Model 4 of Num of Annual Orphan Drug MAs
mod.od.4 <- lm(freq_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd
+ as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df)
summary(mod.od.4)
##
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars +
## gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.4436 -3.1015 -0.0441 2.4754 20.6074
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -2.5320 8.8098
## as.factor(area)US -39.7165 11.1343
## annual_domestic_RD_spending_bil_dollars 0.5204 0.3345
## gdp_per_capita_annual_growthrate_usd 0.1157 0.2550
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 1.3312 0.3656
## t value Pr(>|t|)
## (Intercept) -0.287 0.77571
## as.factor(area)US -3.567 0.00120 **
## annual_domestic_RD_spending_bil_dollars 1.556 0.12990
## gdp_per_capita_annual_growthrate_usd 0.454 0.65300
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 3.641 0.00098 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.836 on 31 degrees of freedom
## Multiple R-squared: 0.9045, Adjusted R-squared: 0.8922
## F-statistic: 73.42 on 4 and 31 DF, p-value: 2.318e-15
plot_lin_pois_od(mod.od.4, "4", "Linear")


#Estimate covariance matrix with Newey West
mod.od.4.vcov <- vcovPL(mod.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.od.nw.4 <-coeftest(mod.od.4, vcov = mod.od.4.vcov)
mod.od.nw.4
##
## t test of coefficients:
##
## Estimate Std. Error
## (Intercept) -2.53198 3.64040
## as.factor(area)US -39.71653 7.30819
## annual_domestic_RD_spending_bil_dollars 0.52038 0.13654
## gdp_per_capita_annual_growthrate_usd 0.11574 0.16107
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 1.33116 0.15781
## t value Pr(>|t|)
## (Intercept) -0.6955 0.4919097
## as.factor(area)US -5.4345 6.177e-06 ***
## annual_domestic_RD_spending_bil_dollars 3.8111 0.0006158 ***
## gdp_per_capita_annual_growthrate_usd 0.7186 0.4777807
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 8.4354 1.578e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Get RMSE
predicted <- predict(mod.od.4, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
mod.od.4.rmse <- rmse(actual=actual, predicted=predicted)
confint(mod.od.nw.4)
## 2.5 %
## (Intercept) -9.9566323
## as.factor(area)US -54.6216884
## annual_domestic_RD_spending_bil_dollars 0.2418948
## gdp_per_capita_annual_growthrate_usd -0.2127660
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 1.0093119
## 97.5 %
## (Intercept) 4.8926710
## as.factor(area)US -24.8113739
## annual_domestic_RD_spending_bil_dollars 0.7988630
## gdp_per_capita_annual_growthrate_usd 0.4442535
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 1.6530078
mod.od.4.metrics <- get_od_model_metrics(mod.od.4, "Linear")
Linear Model 5 of Num of Annual Orphan Drug MAs
mod.od.5 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars , data=combined_rd_ma_df)
summary(mod.od.5)
##
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd +
## as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.171 -4.071 -1.797 4.741 20.032
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.5217 17.7548
## as.factor(area)US -29.5076 20.3273
## yearsafter2004 0.3260 1.4230
## annual_domestic_RD_spending_bil_dollars 0.2801 1.1560
## gdp_per_capita_annual_growthrate_usd 0.1704 0.2511
## as.factor(area)US:yearsafter2004 1.6236 1.7585
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.9226 1.2147
## t value Pr(>|t|)
## (Intercept) 0.029 0.977
## as.factor(area)US -1.452 0.157
## yearsafter2004 0.229 0.820
## annual_domestic_RD_spending_bil_dollars 0.242 0.810
## gdp_per_capita_annual_growthrate_usd 0.679 0.503
## as.factor(area)US:yearsafter2004 0.923 0.363
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.760 0.454
##
## Residual standard error: 8.627 on 29 degrees of freedom
## Multiple R-squared: 0.9149, Adjusted R-squared: 0.8973
## F-statistic: 51.94 on 6 and 29 DF, p-value: 3.319e-14
plot_lin_pois_od(mod.od.5, "5", "Linear")


#Estimate covariance matrix with Newey West
mod.od.5.vcov <- vcovPL(mod.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.od.nw.5 <- coeftest(mod.od.5, vcov = mod.od.5.vcov)
mod.od.nw.5
##
## t test of coefficients:
##
## Estimate Std. Error
## (Intercept) 0.52172 6.66568
## as.factor(area)US -29.50760 6.38127
## yearsafter2004 0.32598 0.55261
## annual_domestic_RD_spending_bil_dollars 0.28009 0.43516
## gdp_per_capita_annual_growthrate_usd 0.17044 0.16030
## as.factor(area)US:yearsafter2004 1.62358 1.06892
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.92259 0.44845
## t value Pr(>|t|)
## (Intercept) 0.0783 0.93815
## as.factor(area)US -4.6241 7.203e-05 ***
## yearsafter2004 0.5899 0.55983
## annual_domestic_RD_spending_bil_dollars 0.6436 0.52486
## gdp_per_capita_annual_growthrate_usd 1.0633 0.29642
## as.factor(area)US:yearsafter2004 1.5189 0.13962
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 2.0573 0.04875 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Get RMSE
predicted <- predict(mod.od.5, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
mod.od.5.rmse <- rmse(actual=actual, predicted=predicted)
confint(mod.od.nw.5)
## 2.5 %
## (Intercept) -13.111122277
## as.factor(area)US -42.558750363
## yearsafter2004 -0.804238887
## annual_domestic_RD_spending_bil_dollars -0.609907728
## gdp_per_capita_annual_growthrate_usd -0.157398553
## as.factor(area)US:yearsafter2004 -0.562604191
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.005410144
## 97.5 %
## (Intercept) 14.1545525
## as.factor(area)US -16.4564446
## yearsafter2004 1.4562082
## annual_domestic_RD_spending_bil_dollars 1.1700830
## gdp_per_capita_annual_growthrate_usd 0.4982868
## as.factor(area)US:yearsafter2004 3.8097712
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 1.8397741
mod.od.5.metrics <- get_od_model_metrics(mod.od.5, "Linear")
mod.od.metrics <- rbind(mod.od.1.metrics, mod.od.2.metrics, mod.od.3.metrics, mod.od.4.metrics, mod.od.5.metrics)
aic_values <- round(mod.od.metrics$aic)
rmse_values <- round(mod.od.metrics$rmse, 2)
fstat_values <- round(mod.od.metrics$fstat, 2)
p_values <- round(mod.od.metrics$pval, 4)
adjrsq_values <- round(mod.od.metrics$adjrsq, 2)
stargazer(mod.od.1, mod.od.2, mod.od.3, mod.od.4, mod.od.5, title="Linear Models of Orphan Drug Market Authorizations (Not Adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/orphandrugMAmodelsNonAdjusted.htm",
covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)" ,"Intercept (β<sup>̂</sup><sub>0</sub>)"),
dep.var.labels=c("Annual Number of Market Authorized Orphan Drugs"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Linear Models of Orphan Drug Market Authorizations (Not Adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorized Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>32.944<sup>***</sup></td><td>-9.800</td><td>-4.177</td><td>-39.717<sup>***</sup></td><td>-29.508</td></tr>
## <tr><td style="text-align:left"></td><td>(5.137)</td><td>(7.631)</td><td>(6.276)</td><td>(11.134)</td><td>(20.327)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>2.827<sup>***</sup></td><td>-0.912</td><td>0.691</td><td></td><td>0.326</td></tr>
## <tr><td style="text-align:left"></td><td>(0.495)</td><td>(0.698)</td><td>(0.453)</td><td></td><td>(1.423)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>2.007<sup>***</sup></td><td></td><td>0.520</td><td>0.280</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.321)</td><td></td><td>(0.334)</td><td>(1.156)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.217</td><td>0.267</td><td>0.116</td><td>0.170</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.299)</td><td>(0.281)</td><td>(0.255)</td><td>(0.251)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>4.365<sup>***</sup></td><td></td><td>1.624</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.633)</td><td></td><td>(1.758)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>1.331<sup>***</sup></td><td>0.923</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.366)</td><td>(1.215)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>-13.197<sup>**</sup></td><td>-32.230<sup>***</sup></td><td>4.101</td><td>-2.532</td><td>0.522</td></tr>
## <tr><td style="text-align:left"></td><td>(5.559)</td><td>(4.733)</td><td>(4.733)</td><td>(8.810)</td><td>(17.755)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>304</td><td>277</td><td>272</td><td>266</td><td>266</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>14.76</td><td>9.54</td><td>9.02</td><td>8.2</td><td>7.74</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.67</td><td>0.85</td><td>0.87</td><td>0.89</td><td>0.9</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>36.86</td><td>52.24</td><td>59.38</td><td>73.42</td><td>51.94</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.od.nw.1
m2 <- mod.od.nw.2
m3 <- mod.od.nw.3
m4 <- mod.od.nw.4
m5 <- mod.od.nw.5
stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Linear Models of Orphan Drug Market Authorizations", align=TRUE, type = "html", out="Final Models Formatted/orphandrugMAmodels.htm",
covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sub>6</sub>)" ,"Intercept (β<sup>̂</sup><sub>0</sub>)"),
dep.var.labels=c("Annual Number of Market Authorized Orphan Drugs"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Orphan Drug Market Authorizations</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorized Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>32.944<sup>***</sup></td><td>-9.800</td><td>-4.177</td><td>-39.717<sup>***</sup></td><td>-29.508<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(9.890)</td><td>(8.026)</td><td>(8.025)</td><td>(7.308)</td><td>(6.381)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>2.827<sup>***</sup></td><td>-0.912</td><td>0.691<sup>***</sup></td><td></td><td>0.326</td></tr>
## <tr><td style="text-align:left"></td><td>(0.378)</td><td>(0.600)</td><td>(0.164)</td><td></td><td>(0.553)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>2.007<sup>***</sup></td><td></td><td>0.520<sup>***</sup></td><td>0.280</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.319)</td><td></td><td>(0.137)</td><td>(0.435)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.217</td><td>0.267<sup>*</sup></td><td>0.116</td><td>0.170</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.246)</td><td>(0.154)</td><td>(0.161)</td><td>(0.160)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>4.365<sup>***</sup></td><td></td><td>1.624</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.765)</td><td></td><td>(1.069)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sub>6</sub>)</td><td></td><td></td><td></td><td>1.331<sup>***</sup></td><td>0.923<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.158)</td><td>(0.448)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>-13.197<sup>*</sup></td><td>-32.230<sup>***</sup></td><td>4.101<sup>**</sup></td><td>-2.532</td><td>0.522</td></tr>
## <tr><td style="text-align:left"></td><td>(7.386)</td><td>(5.152)</td><td>(1.595)</td><td>(3.640)</td><td>(6.666)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>304</td><td>277</td><td>272</td><td>266</td><td>266</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>14.76</td><td>9.54</td><td>9.02</td><td>8.2</td><td>7.74</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.67</td><td>0.85</td><td>0.87</td><td>0.89</td><td>0.9</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>36.86</td><td>52.24</td><td>59.38</td><td>73.42</td><td>51.94</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
Poisson Models of Number of Orphan Drug Market Authorization
Models
Poisson Model 1 of Num of Annual Orphan Drug MAs
mod.pois.od.1 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.1)
##
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.287989 0.104548 12.32 <2e-16 ***
## as.factor(area)US 1.396499 0.079982 17.46 <2e-16 ***
## yearsafter2004 0.110161 0.006749 16.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 759.207 on 35 degrees of freedom
## Residual deviance: 83.399 on 33 degrees of freedom
## AIC: 260.65
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.1, "1", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.od.1.vcov <- vcovPL(mod.pois.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.1 <- coeftest(mod.pois.od.1, vcov = mod.pois.od.1.vcov)
mod.pois.od.nw.1
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.287989 0.202155 6.3713 1.875e-10 ***
## as.factor(area)US 1.396499 0.152009 9.1869 < 2.2e-16 ***
## yearsafter2004 0.110161 0.011615 9.4841 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.1)
## 2.5 % 97.5 %
## (Intercept) 0.89177186 1.6842066
## as.factor(area)US 1.09856577 1.6944313
## yearsafter2004 0.08739558 0.1329269
mod.pois.od.1.metrics <- get_od_model_metrics(mod.pois.od.1, "Poisson")
Poisson Model 2 of Num of Annual Orphan Drug MAs
mod.pois.od.2 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.2)
##
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.302515 0.102680 12.685 < 2e-16
## as.factor(area)US 1.090152 0.156723 6.956 3.50e-12
## yearsafter2004 0.077325 0.015750 4.910 9.13e-07
## annual_domestic_RD_spending_bil_dollars 0.011783 0.005311 2.219 0.0265
## gdp_per_capita_annual_growthrate_usd 0.006178 0.007158 0.863 0.3881
##
## (Intercept) ***
## as.factor(area)US ***
## yearsafter2004 ***
## annual_domestic_RD_spending_bil_dollars *
## gdp_per_capita_annual_growthrate_usd
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 759.207 on 35 degrees of freedom
## Residual deviance: 76.552 on 31 degrees of freedom
## AIC: 257.81
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.2, "2", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.od.2.vcov <- vcovPL(mod.pois.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.2 <- coeftest(mod.pois.od.2, vcov = mod.pois.od.2.vcov)
mod.pois.od.nw.2
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.3025153 0.1499779 8.6847 < 2.2e-16
## as.factor(area)US 1.0901521 0.2178398 5.0044 5.604e-07
## yearsafter2004 0.0773254 0.0195387 3.9576 7.572e-05
## annual_domestic_RD_spending_bil_dollars 0.0117827 0.0053372 2.2077 0.02727
## gdp_per_capita_annual_growthrate_usd 0.0061781 0.0129818 0.4759 0.63414
##
## (Intercept) ***
## as.factor(area)US ***
## yearsafter2004 ***
## annual_domestic_RD_spending_bil_dollars *
## gdp_per_capita_annual_growthrate_usd
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.2)
## 2.5 % 97.5 %
## (Intercept) 1.008564107 1.59646648
## as.factor(area)US 0.663193984 1.51711031
## yearsafter2004 0.039030329 0.11562056
## annual_domestic_RD_spending_bil_dollars 0.001321983 0.02224351
## gdp_per_capita_annual_growthrate_usd -0.019265783 0.03162206
mod.pois.od.2.metrics <- get_od_model_metrics(mod.pois.od.2, "Poisson")
Poisson Model 3 of Num of Annual Orphan Drug MAs
mod.pois.od.3 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.3)
##
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.83366 0.16093 11.394 < 2e-16 ***
## as.factor(area)US 0.66882 0.18556 3.604 0.000313 ***
## yearsafter2004 0.05711 0.01406 4.061 4.88e-05 ***
## gdp_per_capita_annual_growthrate_usd 0.00669 0.00700 0.956 0.339238
## as.factor(area)US:yearsafter2004 0.06688 0.01616 4.139 3.48e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 759.207 on 35 degrees of freedom
## Residual deviance: 64.577 on 31 degrees of freedom
## AIC: 245.83
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.3, "3", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.od.3.vcov <- vcovPL(mod.pois.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.3 <- coeftest(mod.pois.od.3, vcov = mod.pois.od.3.vcov)
mod.pois.od.nw.3
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.8336595 0.1868493 9.8136 < 2.2e-16 ***
## as.factor(area)US 0.6688196 0.1924589 3.4751 0.0005106 ***
## yearsafter2004 0.0571056 0.0154020 3.7077 0.0002092 ***
## gdp_per_capita_annual_growthrate_usd 0.0066898 0.0109286 0.6121 0.5404476
## as.factor(area)US:yearsafter2004 0.0668829 0.0152407 4.3884 1.142e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.3)
## 2.5 % 97.5 %
## (Intercept) 1.46744163 2.19987736
## as.factor(area)US 0.29160715 1.04603202
## yearsafter2004 0.02691824 0.08729288
## gdp_per_capita_annual_growthrate_usd -0.01472983 0.02810937
## as.factor(area)US:yearsafter2004 0.03701170 0.09675405
mod.pois.od.3.metrics <- get_od_model_metrics(mod.pois.od.3, "Poisson")
Poisson Model 4 of Num of Annual Orphan Drug MAs
mod.pois.od.4 <- glm(freq_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.4)
##
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars +
## gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.2582327 0.3072993
## as.factor(area)US 0.7297465 0.3311064
## annual_domestic_RD_spending_bil_dollars 0.0435081 0.0110544
## gdp_per_capita_annual_growthrate_usd -0.0005021 0.0067903
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.0079991 0.0113163
## z value Pr(>|z|)
## (Intercept) 4.094 4.23e-05 ***
## as.factor(area)US 2.204 0.0275 *
## annual_domestic_RD_spending_bil_dollars 3.936 8.29e-05 ***
## gdp_per_capita_annual_growthrate_usd -0.074 0.9411
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.707 0.4797
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 759.21 on 35 degrees of freedom
## Residual deviance: 100.64 on 31 degrees of freedom
## AIC: 281.89
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.4, "4", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.od.4.vcov <- vcovPL(mod.pois.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.4 <- coeftest(mod.pois.od.4, vcov = mod.pois.od.4.vcov)
mod.pois.od.nw.4
##
## z test of coefficients:
##
## Estimate
## (Intercept) 1.25823275
## as.factor(area)US 0.72974651
## annual_domestic_RD_spending_bil_dollars 0.04350814
## gdp_per_capita_annual_growthrate_usd -0.00050209
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.00799909
## Std. Error z value
## (Intercept) 0.34888379 3.6065
## as.factor(area)US 0.35309408 2.0667
## annual_domestic_RD_spending_bil_dollars 0.01181452 3.6826
## gdp_per_capita_annual_growthrate_usd 0.01101955 -0.0456
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.01040192 -0.7690
## Pr(>|z|)
## (Intercept) 0.0003104 ***
## as.factor(area)US 0.0387606 *
## annual_domestic_RD_spending_bil_dollars 0.0002309 ***
## gdp_per_capita_annual_growthrate_usd 0.9636583
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.4418925
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.4)
## 2.5 %
## (Intercept) 0.57443309
## as.factor(area)US 0.03769483
## annual_domestic_RD_spending_bil_dollars 0.02035211
## gdp_per_capita_annual_growthrate_usd -0.02210000
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.02838648
## 97.5 %
## (Intercept) 1.94203240
## as.factor(area)US 1.42179819
## annual_domestic_RD_spending_bil_dollars 0.06666417
## gdp_per_capita_annual_growthrate_usd 0.02109583
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.01238830
mod.pois.od.4.metrics <- get_od_model_metrics(mod.pois.od.4, "Poisson")
Poisson Model 5 of Num of Annual Orphan Drug MAs
mod.pois.od.5 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.5)
##
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd +
## as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.587669 0.644130
## as.factor(area)US 0.954539 0.663174
## yearsafter2004 0.036717 0.054130
## annual_domestic_RD_spending_bil_dollars 0.016716 0.042740
## gdp_per_capita_annual_growthrate_usd 0.007273 0.007184
## as.factor(area)US:yearsafter2004 0.094297 0.058561
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.018953 0.043284
## z value Pr(>|z|)
## (Intercept) 2.465 0.0137 *
## as.factor(area)US 1.439 0.1501
## yearsafter2004 0.678 0.4976
## annual_domestic_RD_spending_bil_dollars 0.391 0.6957
## gdp_per_capita_annual_growthrate_usd 1.012 0.3114
## as.factor(area)US:yearsafter2004 1.610 0.1073
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.438 0.6615
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 759.207 on 35 degrees of freedom
## Residual deviance: 64.312 on 29 degrees of freedom
## AIC: 249.57
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.5, "5", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.od.5.vcov <- vcovPL(mod.pois.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.5 <- coeftest(mod.pois.od.5, vcov = mod.pois.od.5.vcov)
mod.pois.od.nw.5
##
## z test of coefficients:
##
## Estimate Std. Error
## (Intercept) 1.5876688 0.6905329
## as.factor(area)US 0.9545391 0.5669762
## yearsafter2004 0.0367170 0.0592109
## annual_domestic_RD_spending_bil_dollars 0.0167164 0.0454894
## gdp_per_capita_annual_growthrate_usd 0.0072732 0.0108530
## as.factor(area)US:yearsafter2004 0.0942970 0.0663960
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.0189534 0.0442075
## z value Pr(>|z|)
## (Intercept) 2.2992 0.02149 *
## as.factor(area)US 1.6836 0.09227 .
## yearsafter2004 0.6201 0.53519
## annual_domestic_RD_spending_bil_dollars 0.3675 0.71326
## gdp_per_capita_annual_growthrate_usd 0.6702 0.50276
## as.factor(area)US:yearsafter2004 1.4202 0.15554
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.4287 0.66811
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.5)
## 2.5 %
## (Intercept) 0.23424917
## as.factor(area)US -0.15671387
## yearsafter2004 -0.07933417
## annual_domestic_RD_spending_bil_dollars -0.07244128
## gdp_per_capita_annual_growthrate_usd -0.01399818
## as.factor(area)US:yearsafter2004 -0.03583679
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.10559845
## 97.5 %
## (Intercept) 2.94108845
## as.factor(area)US 2.06579208
## yearsafter2004 0.15276810
## annual_domestic_RD_spending_bil_dollars 0.10587408
## gdp_per_capita_annual_growthrate_usd 0.02854465
## as.factor(area)US:yearsafter2004 0.22443079
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.06769167
mod.pois.od.5.metrics <- get_od_model_metrics(mod.pois.od.5, "Poisson")
mod.od.pois.metrics <- rbind(mod.pois.od.1.metrics, mod.pois.od.2.metrics, mod.pois.od.3.metrics, mod.pois.od.4.metrics, mod.pois.od.5.metrics)
mod.od.pois.metrics
mod.od.pois.metrics <- rbind(mod.pois.od.1.metrics, mod.pois.od.2.metrics, mod.pois.od.3.metrics, mod.pois.od.4.metrics, mod.pois.od.5.metrics)
aic_values <- round(mod.od.pois.metrics$aic)
rmse_values <- round(mod.od.pois.metrics$rmse, 2)
logLik_vlues <- round(mod.od.pois.metrics$LogLik, 2)
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.pois.od.1
m2 <- mod.pois.od.2
m3 <- mod.pois.od.3
m4 <- mod.pois.od.4
m5 <- mod.pois.od.5
stargazer(m1, m2, m3, m4, m5, title="Poisson Models of Annual Orphan Drug Market Authorizations (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/PoissonorphandrugMAmodelsNonAdjusted.htm",
covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)","Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)" , "Intercept (β<sup>̂</sup><sub>0</sub>)"),
dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", logLik_vlues)))
##
## <table style="text-align:center"><caption><strong>Poisson Models of Annual Orphan Drug Market Authorizations (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>1.396<sup>***</sup></td><td>1.090<sup>***</sup></td><td>0.669<sup>***</sup></td><td>0.730<sup>**</sup></td><td>0.955</td></tr>
## <tr><td style="text-align:left"></td><td>(0.080)</td><td>(0.157)</td><td>(0.186)</td><td>(0.331)</td><td>(0.663)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>0.110<sup>***</sup></td><td>0.077<sup>***</sup></td><td>0.057<sup>***</sup></td><td></td><td>0.037</td></tr>
## <tr><td style="text-align:left"></td><td>(0.007)</td><td>(0.016)</td><td>(0.014)</td><td></td><td>(0.054)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.012<sup>**</sup></td><td></td><td>0.044<sup>***</sup></td><td>0.017</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.005)</td><td></td><td>(0.011)</td><td>(0.043)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.006</td><td>0.007</td><td>-0.001</td><td>0.007</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.007)</td><td>(0.007)</td><td>(0.007)</td><td>(0.007)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>0.067<sup>***</sup></td><td></td><td>0.094</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.016)</td><td></td><td>(0.059)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>-0.008</td><td>-0.019</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.011)</td><td>(0.043)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>1.288<sup>***</sup></td><td>1.303<sup>***</sup></td><td>1.834<sup>***</sup></td><td>1.258<sup>***</sup></td><td>1.588<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.105)</td><td>(0.103)</td><td>(0.161)</td><td>(0.307)</td><td>(0.644)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>261</td><td>258</td><td>246</td><td>282</td><td>250</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>8.03</td><td>8.41</td><td>7.57</td><td>10.52</td><td>7.5</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-127.33</td><td>-123.9</td><td>-117.92</td><td>-135.95</td><td>-117.78</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.pois.od.nw.1
m2 <- mod.pois.od.nw.2
m3 <- mod.pois.od.nw.3
m4 <- mod.pois.od.nw.4
m5 <- mod.pois.od.nw.5
stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Poisson Models of Orphan Drug Market Authorizations", align=TRUE, type = "html", out="Final Models Formatted/PoissonorphandrugMAmodels.htm",
covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)","Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)" , "Intercept (β<sup>̂</sup><sub>0</sub>)"),
dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", logLik_vlues)))
##
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Poisson Models of Orphan Drug Market Authorizations</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>1.396<sup>***</sup></td><td>1.090<sup>***</sup></td><td>0.669<sup>***</sup></td><td>0.730<sup>**</sup></td><td>0.955<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.152)</td><td>(0.218)</td><td>(0.192)</td><td>(0.353)</td><td>(0.567)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>0.110<sup>***</sup></td><td>0.077<sup>***</sup></td><td>0.057<sup>***</sup></td><td></td><td>0.037</td></tr>
## <tr><td style="text-align:left"></td><td>(0.012)</td><td>(0.020)</td><td>(0.015)</td><td></td><td>(0.059)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.012<sup>**</sup></td><td></td><td>0.044<sup>***</sup></td><td>0.017</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.005)</td><td></td><td>(0.012)</td><td>(0.045)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.006</td><td>0.007</td><td>-0.001</td><td>0.007</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.013)</td><td>(0.011)</td><td>(0.011)</td><td>(0.011)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>0.067<sup>***</sup></td><td></td><td>0.094</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.015)</td><td></td><td>(0.066)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>-0.008</td><td>-0.019</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.010)</td><td>(0.044)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>1.288<sup>***</sup></td><td>1.303<sup>***</sup></td><td>1.834<sup>***</sup></td><td>1.258<sup>***</sup></td><td>1.588<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.202)</td><td>(0.150)</td><td>(0.187)</td><td>(0.349)</td><td>(0.691)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>261</td><td>258</td><td>246</td><td>282</td><td>250</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>8.03</td><td>8.41</td><td>7.57</td><td>10.52</td><td>7.5</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-127.33</td><td>-123.9</td><td>-117.92</td><td>-135.95</td><td>-117.78</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
Linear Models of Number of Market Authorizations for NEW Orphan
Drugs
Plot Models of Number of Orphan Drug Market Authorizations for NEW
Orphan Drugs (Linear or Poisson Models)
plot_lin_pois_new_od <- function(model, model_number, model_type){
#Plot residuals and acf/ pacf of residuals by panel (EU/US)
combined_rd_ma_df$residuals <- resid(model)
par(mfrow =c(2,4))
us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
acf(us_resid_data$residuals, main="ACF of US Residuals")
pacf(us_resid_data$residuals, main="PACF of US Residuals")
qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
acf(eu_resid_data$residuals, main="ACF of EU Residuals")
pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
mtext(glue('{model_type} Model {model_number} of Annual MAs for New Orphan Drugs - Residual Plots'), side = 3, line = -1.1, outer = TRUE)
predicted <- predict(model , type = "response")
ggplot(combined_rd_ma_df, aes(x = year, y = freq_new_orphan_drug_mas, color = area, alpha="Observed")) +
geom_point() +
labs(x = "Year",
y = str_wrap("Number of Annual Market Authorizations for New Orphan Drugs", width = 40),
color = "Area",
alpha = "Line Type",
title=str_wrap(glue("{model_type} Model {model_number} of Annual Market Authorizations for New Orphan Drugs"), width = 60))+
scale_color_manual(name = "Color", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
scale_alpha_manual(values = c(0.5, 1))+
theme_minimal()
# https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
#Controlling legend appearance in ggplot2 with override.aes
#July 9, 2020 · @aosmith16
# https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
#Teun van den Brand
#2024/02/26
}
Get Metrics of Models of Number of Market Authorizations for NEW
Orphan Drugs (Linear or Poisson Models)
get_new_od_model_metrics <- function(model, model_type){
#Get RMSE
predicted <- predict(model, type = "response")
actual <- combined_rd_ma_df$freq_new_orphan_drug_mas
model_rmse <- rmse(actual=actual, predicted=predicted)
model_df1 <- NA
model_df2 <- NA
model_adjRsq <- NA
model_fstat <- NA
model_pval <- NA
model_loglik <- NA
if(model_type == "Linear"){
model_df1 <- summary(model)$fstatistic[2]
model_df2 <- summary(model)$fstatistic[3]
model_fstat <- summary(model)$fstatistic[1]
model_pval <- pf(summary(model)$fstatistic[1], summary(model)$fstatistic[2], summary(model)$fstatistic[3], lower.tail = FALSE)
# https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
# Renesh Bedre
# March 26, 2023
model_adjRsq <- summary(model)$adj.r.squared
}
if(model_type == "Poisson"){
model_loglik <- logLik(model)
}
model_aic <- AIC(model)
metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq, "LogLik" = model_loglik)
rownames(metrics_df) <- NULL
return(metrics_df)
}
Linear Model 1 of Annual MAs for NEW orphan drugs
mod.new.od.1 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df)
summary(mod.new.od.1)
##
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.4943 -3.5090 -0.7898 3.6053 13.5920
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8772 2.3258 -0.377 0.708
## as.factor(area)US 15.5556 2.1492 7.238 2.66e-08 ***
## yearsafter2004 1.3581 0.2071 6.557 1.89e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.448 on 33 degrees of freedom
## Multiple R-squared: 0.7429, Adjusted R-squared: 0.7274
## F-statistic: 47.69 on 2 and 33 DF, p-value: 1.842e-10
plot_lin_pois_new_od(mod.new.od.1, "1", "Linear")


#Estimate covariance matrix with Newey West
mod.new.od.1.vcov <- vcovPL(mod.new.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.1 <-coeftest(mod.new.od.1, vcov = mod.new.od.1.vcov)
mod.new.od.nw.1
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.87719 2.55934 -0.3427 0.734
## as.factor(area)US 15.55556 3.22850 4.8182 3.149e-05 ***
## yearsafter2004 1.35810 0.13298 10.2128 9.530e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.1.metrics <- get_new_od_model_metrics(mod.new.od.1, "Linear")
Linear Model 2 of Annual MAs for NEW orphan drugs
mod.new.od.2 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.new.od.2)
##
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.1785 -3.2104 -0.2759 3.0949 10.9813
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.35122 2.61475 -2.429 0.02113 *
## as.factor(area)US 3.53369 4.21571 0.838 0.40832
## yearsafter2004 0.31111 0.38581 0.806 0.42616
## annual_domestic_RD_spending_bil_dollars 0.56444 0.17708 3.187 0.00327 **
## gdp_per_capita_annual_growthrate_usd 0.08705 0.16511 0.527 0.60181
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.679 on 31 degrees of freedom
## Multiple R-squared: 0.8127, Adjusted R-squared: 0.7885
## F-statistic: 33.63 on 4 and 31 DF, p-value: 7.211e-11
plot_lin_pois_new_od(mod.new.od.2, "2", "Linear")


#Estimate covariance matrix with Newey West
mod.new.od.2.vcov <- vcovPL(mod.new.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.2 <-coeftest(mod.new.od.2, vcov = mod.new.od.2.vcov)
mod.new.od.nw.2
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.351220 1.759759 -3.6091 0.0010681
## as.factor(area)US 3.533692 4.316055 0.8187 0.4191890
## yearsafter2004 0.311113 0.343208 0.9065 0.3716727
## annual_domestic_RD_spending_bil_dollars 0.564440 0.152241 3.7075 0.0008175
## gdp_per_capita_annual_growthrate_usd 0.087047 0.161052 0.5405 0.5927195
##
## (Intercept) **
## as.factor(area)US
## yearsafter2004
## annual_domestic_RD_spending_bil_dollars ***
## gdp_per_capita_annual_growthrate_usd
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.2.metrics <- get_new_od_model_metrics(mod.new.od.2, "Linear")
confint(mod.new.od.nw.2)
## 2.5 % 97.5 %
## (Intercept) -9.9402716 -2.7621681
## as.factor(area)US -5.2689605 12.3363440
## yearsafter2004 -0.3888657 1.0110909
## annual_domestic_RD_spending_bil_dollars 0.2539426 0.8749366
## gdp_per_capita_annual_growthrate_usd -0.2414197 0.4155144
Linear Model 3 of Annual MAs for NEW orphan drugs
mod.new.od.3 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004 , data=combined_rd_ma_df)
summary(mod.new.od.3)
##
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.4278 -2.9585 0.0282 3.2874 8.5813
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.83138 2.53528 1.906 0.066005 .
## as.factor(area)US 3.32700 3.36173 0.990 0.329998
## yearsafter2004 0.65404 0.24250 2.697 0.011208 *
## gdp_per_capita_annual_growthrate_usd 0.08599 0.15042 0.572 0.571687
## as.factor(area)US:yearsafter2004 1.43784 0.33899 4.242 0.000186 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.205 on 31 degrees of freedom
## Multiple R-squared: 0.8426, Adjusted R-squared: 0.8223
## F-statistic: 41.5 on 4 and 31 DF, p-value: 5.012e-12
plot_lin_pois_new_od(mod.new.od.3, "3", "Linear")


#Estimate covariance matrix with Newey West
mod.new.od.3.vcov <- vcovPL(mod.new.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.3 <-coeftest(mod.new.od.3, vcov = mod.new.od.3.vcov)
mod.new.od.nw.3
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.831381 1.439561 3.3561 0.002102 **
## as.factor(area)US 3.327002 2.407061 1.3822 0.176794
## yearsafter2004 0.654042 0.144359 4.5307 8.205e-05 ***
## gdp_per_capita_annual_growthrate_usd 0.085988 0.128458 0.6694 0.508206
## as.factor(area)US:yearsafter2004 1.437838 0.247706 5.8046 2.142e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.3.metrics <- get_new_od_model_metrics(mod.new.od.3, "Linear")
confint(mod.new.od.nw.3)
## 2.5 % 97.5 %
## (Intercept) 1.8953778 7.7673842
## as.factor(area)US -1.5822306 8.2362343
## yearsafter2004 0.3596208 0.9484642
## gdp_per_capita_annual_growthrate_usd -0.1760037 0.3479788
## as.factor(area)US:yearsafter2004 0.9326396 1.9430373
Linear Model 4 of Annual MAs for NEW orphan drugs
mod.new.od.4 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df)
summary(mod.new.od.4)
##
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars +
## gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.6424 -3.6096 -0.1338 4.1221 11.4456
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -2.30838 5.65061
## as.factor(area)US -4.97703 7.14154
## annual_domestic_RD_spending_bil_dollars 0.51651 0.21453
## gdp_per_capita_annual_growthrate_usd 0.02422 0.16353
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.20616 0.23451
## t value Pr(>|t|)
## (Intercept) -0.409 0.6857
## as.factor(area)US -0.697 0.4911
## annual_domestic_RD_spending_bil_dollars 2.408 0.0222 *
## gdp_per_capita_annual_growthrate_usd 0.148 0.8832
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.879 0.3861
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.668 on 31 degrees of freedom
## Multiple R-squared: 0.8134, Adjusted R-squared: 0.7893
## F-statistic: 33.79 on 4 and 31 DF, p-value: 6.797e-11
plot_lin_pois_new_od(mod.new.od.4, "4", "Linear")


#Estimate covariance matrix with Newey West
mod.new.od.4.vcov <- vcovPL(mod.new.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.4 <-coeftest(mod.new.od.4, vcov = mod.new.od.4.vcov)
mod.new.od.nw.4
##
## t test of coefficients:
##
## Estimate Std. Error
## (Intercept) -2.308381 3.267658
## as.factor(area)US -4.977025 4.041802
## annual_domestic_RD_spending_bil_dollars 0.516511 0.123020
## gdp_per_capita_annual_growthrate_usd 0.024225 0.133323
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.206155 0.100468
## t value Pr(>|t|)
## (Intercept) -0.7064 0.4851951
## as.factor(area)US -1.2314 0.2274382
## annual_domestic_RD_spending_bil_dollars 4.1986 0.0002097 ***
## gdp_per_capita_annual_growthrate_usd 0.1817 0.8570003
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 2.0520 0.0487012 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.4.metrics <- get_new_od_model_metrics(mod.new.od.4, "Linear")
confint(mod.new.od.nw.4)
## 2.5 %
## (Intercept) -8.972813840
## as.factor(area)US -13.220335718
## annual_domestic_RD_spending_bil_dollars 0.265609730
## gdp_per_capita_annual_growthrate_usd -0.247689347
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.001250445
## 97.5 %
## (Intercept) 4.3560519
## as.factor(area)US 3.2662853
## annual_domestic_RD_spending_bil_dollars 0.7674114
## gdp_per_capita_annual_growthrate_usd 0.2961392
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.4110603
Linear Model 5 of Annual MAs for NEW orphan drugs
mod.new.od.5 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df)
summary(mod.new.od.5)
##
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd +
## as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.9080 -3.0238 0.1602 3.5052 8.4363
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.75206 10.92260
## as.factor(area)US 2.88653 12.50519
## yearsafter2004 0.31846 0.87540
## annual_domestic_RD_spending_bil_dollars 0.27966 0.71115
## gdp_per_capita_annual_growthrate_usd 0.06997 0.15450
## as.factor(area)US:yearsafter2004 1.28754 1.08180
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.09153 0.74724
## t value Pr(>|t|)
## (Intercept) 0.069 0.946
## as.factor(area)US 0.231 0.819
## yearsafter2004 0.364 0.719
## annual_domestic_RD_spending_bil_dollars 0.393 0.697
## gdp_per_capita_annual_growthrate_usd 0.453 0.654
## as.factor(area)US:yearsafter2004 1.190 0.244
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.122 0.903
##
## Residual standard error: 5.307 on 29 degrees of freedom
## Multiple R-squared: 0.847, Adjusted R-squared: 0.8153
## F-statistic: 26.75 on 6 and 29 DF, p-value: 1.421e-10
plot_lin_pois_new_od(mod.new.od.5, "5", "Linear")


#Estimate covariance matrix with Newey West
mod.new.od.5.vcov <- vcovPL(mod.new.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.5 <-coeftest(mod.new.od.5, vcov = mod.new.od.5.vcov)
mod.new.od.nw.5
##
## t test of coefficients:
##
## Estimate Std. Error
## (Intercept) 0.752058 7.161922
## as.factor(area)US 2.886533 6.422452
## yearsafter2004 0.318456 0.554784
## annual_domestic_RD_spending_bil_dollars 0.279663 0.451301
## gdp_per_capita_annual_growthrate_usd 0.069973 0.135317
## as.factor(area)US:yearsafter2004 1.287536 0.720547
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.091526 0.448853
## t value Pr(>|t|)
## (Intercept) 0.1050 0.91709
## as.factor(area)US 0.4494 0.65645
## yearsafter2004 0.5740 0.57038
## annual_domestic_RD_spending_bil_dollars 0.6197 0.54031
## gdp_per_capita_annual_growthrate_usd 0.5171 0.60901
## as.factor(area)US:yearsafter2004 1.7869 0.08441 .
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.2039 0.83985
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.5.metrics <- get_new_od_model_metrics(mod.new.od.5, "Linear")
confint(mod.new.od.nw.5)
## 2.5 %
## (Intercept) -13.8957165
## as.factor(area)US -10.2488567
## yearsafter2004 -0.8162052
## annual_domestic_RD_spending_bil_dollars -0.6433509
## gdp_per_capita_annual_growthrate_usd -0.2067814
## as.factor(area)US:yearsafter2004 -0.1861481
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -1.0095335
## 97.5 %
## (Intercept) 15.3998321
## as.factor(area)US 16.0219220
## yearsafter2004 1.4531173
## annual_domestic_RD_spending_bil_dollars 1.2026759
## gdp_per_capita_annual_growthrate_usd 0.3467273
## as.factor(area)US:yearsafter2004 2.7612196
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.8264817
mod.new.od.metrics <- rbind(mod.new.od.1.metrics, mod.new.od.2.metrics, mod.new.od.3.metrics, mod.new.od.4.metrics, mod.new.od.5.metrics)
aic_values <- round(mod.new.od.metrics$aic)
rmse_values <- round(mod.new.od.metrics$rmse, 2)
fstat_values <- round(mod.new.od.metrics$fstat, 2)
p_values <- round(mod.new.od.metrics$pval, 4)
adjrsq_values <- round(mod.new.od.metrics$adjrsq, 2)
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.new.od.1
m2 <- mod.new.od.2
m3 <- mod.new.od.3
m4 <- mod.new.od.4
m5 <- mod.new.od.5
stargazer(m1, m2, m3, m4, m5, title="Linear Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/NEWorphandrugMAmodelsNonAdjusted.htm",
covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)", "Intercept (β<sup>̂</sup><sub>0</sub>)"),
dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Linear Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>15.556<sup>***</sup></td><td>3.534</td><td>3.327</td><td>-4.977</td><td>2.887</td></tr>
## <tr><td style="text-align:left"></td><td>(2.149)</td><td>(4.216)</td><td>(3.362)</td><td>(7.142)</td><td>(12.505)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>1.358<sup>***</sup></td><td>0.311</td><td>0.654<sup>**</sup></td><td></td><td>0.318</td></tr>
## <tr><td style="text-align:left"></td><td>(0.207)</td><td>(0.386)</td><td>(0.242)</td><td></td><td>(0.875)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.564<sup>***</sup></td><td></td><td>0.517<sup>**</sup></td><td>0.280</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.177)</td><td></td><td>(0.215)</td><td>(0.711)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.087</td><td>0.086</td><td>0.024</td><td>0.070</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.165)</td><td>(0.150)</td><td>(0.164)</td><td>(0.155)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>1.438<sup>***</sup></td><td></td><td>1.288</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.339)</td><td></td><td>(1.082)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>0.206</td><td>-0.092</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.235)</td><td>(0.747)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>-0.877</td><td>-6.351<sup>**</sup></td><td>4.831<sup>*</sup></td><td>-2.308</td><td>0.752</td></tr>
## <tr><td style="text-align:left"></td><td>(2.326)</td><td>(2.615)</td><td>(2.535)</td><td>(5.651)</td><td>(10.923)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>241</td><td>234</td><td>228</td><td>234</td><td>231</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>6.17</td><td>5.27</td><td>4.83</td><td>5.26</td><td>4.76</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.73</td><td>0.79</td><td>0.82</td><td>0.79</td><td>0.82</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>47.69</td><td>33.63</td><td>41.5</td><td>33.79</td><td>26.75</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.new.od.nw.1
m2 <- mod.new.od.nw.2
m3 <- mod.new.od.nw.3
m4 <- mod.new.od.nw.4
m5 <- mod.new.od.nw.5
stargazer(m1, m2, m3, m4, m5, title="Newey-West Adjusted Linear Models of Market Authorizations for New Orphan Drugs", align=TRUE, type = "html", out="Final Models Formatted/NEWorphandrugMAmodels.htm",
covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)", "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)", "Intercept (β<sup>̂</sup><sub>0</sub>)"),
dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Market Authorizations for New Orphan Drugs</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>15.556<sup>***</sup></td><td>3.534</td><td>3.327</td><td>-4.977</td><td>2.887</td></tr>
## <tr><td style="text-align:left"></td><td>(3.229)</td><td>(4.316)</td><td>(2.407)</td><td>(4.042)</td><td>(6.422)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>1.358<sup>***</sup></td><td>0.311</td><td>0.654<sup>***</sup></td><td></td><td>0.318</td></tr>
## <tr><td style="text-align:left"></td><td>(0.133)</td><td>(0.343)</td><td>(0.144)</td><td></td><td>(0.555)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.564<sup>***</sup></td><td></td><td>0.517<sup>***</sup></td><td>0.280</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.152)</td><td></td><td>(0.123)</td><td>(0.451)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.087</td><td>0.086</td><td>0.024</td><td>0.070</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.161)</td><td>(0.128)</td><td>(0.133)</td><td>(0.135)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>1.438<sup>***</sup></td><td></td><td>1.288<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.248)</td><td></td><td>(0.721)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>0.206<sup>**</sup></td><td>-0.092</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.100)</td><td>(0.449)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>-0.877</td><td>-6.351<sup>***</sup></td><td>4.831<sup>***</sup></td><td>-2.308</td><td>0.752</td></tr>
## <tr><td style="text-align:left"></td><td>(2.559)</td><td>(1.760)</td><td>(1.440)</td><td>(3.268)</td><td>(7.162)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>241</td><td>234</td><td>228</td><td>234</td><td>231</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>6.17</td><td>5.27</td><td>4.83</td><td>5.26</td><td>4.76</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.73</td><td>0.79</td><td>0.82</td><td>0.79</td><td>0.82</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>47.69</td><td>33.63</td><td>41.5</td><td>33.79</td><td>26.75</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
Overall mean of Domestic R&D spending
mean(combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars)
## [1] 35.61325
Overall mean of US Domestic R&D spending
mean(subset(combined_rd_ma_df ,area=="US")$annual_domestic_RD_spending_bil_dollars)
## [1] 46.25642
Overall mean of annual EU Domestic R&D spending
mean(subset(combined_rd_ma_df ,area=="EU")$annual_domestic_RD_spending_bil_dollars)
## [1] 24.97008
Overall mean of annual number of US Market Authroized Orphan
Drugs
mean(subset(combined_rd_ma_df ,area=="US")$freq_orphan_drug_mas)
## [1] 43.77778
mean(subset(combined_rd_ma_df ,area=="US")$freq_new_orphan_drug_mas)
## [1] 26.22222
Overall mean of annual number of EU Market Authroized Orphan
Drugs
mean(subset(combined_rd_ma_df ,area=="EU")$freq_orphan_drug_mas)
## [1] 10.83333
mean(subset(combined_rd_ma_df ,area=="EU")$freq_new_orphan_drug_mas)
## [1] 10.66667
Poisson Models for Number of Market Authorizations for NEW Orphan
Drugs
Poisson model 1 of Annual Num of MAs for NEW orphan drugs
mod.pois.new.od.1 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 , data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.1)
##
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.645620 0.109281 15.059 <2e-16 ***
## as.factor(area)US 0.899484 0.085598 10.508 <2e-16 ***
## yearsafter2004 0.075900 0.007829 9.695 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 267.814 on 35 degrees of freedom
## Residual deviance: 47.599 on 33 degrees of freedom
## AIC: 217.38
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.1, "1", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.new.od.1.vcov <- vcovPL(mod.pois.new.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.1 <- coeftest(mod.pois.new.od.1, vcov = mod.pois.new.od.1.vcov)
mod.pois.new.od.nw.1
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.6456196 0.1219637 13.493 < 2.2e-16 ***
## as.factor(area)US 0.8994836 0.0851551 10.563 < 2.2e-16 ***
## yearsafter2004 0.0758998 0.0068369 11.101 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.1.metrics <- get_new_od_model_metrics(mod.pois.new.od.1, "Poisson")
confint(mod.pois.new.od.nw.1)
## 2.5 % 97.5 %
## (Intercept) 1.40657514 1.8846640
## as.factor(area)US 0.73258262 1.0663846
## yearsafter2004 0.06249974 0.0892999
Poisson model 2 of Annual Num of MAs for NEW orphan drugs
mod.pois.new.od.2 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.2)
##
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.635504 0.110626 14.784 < 2e-16
## as.factor(area)US 0.846810 0.177685 4.766 1.88e-06
## yearsafter2004 0.070676 0.017502 4.038 5.39e-05
## annual_domestic_RD_spending_bil_dollars 0.002101 0.006431 0.327 0.744
## gdp_per_capita_annual_growthrate_usd 0.002571 0.008073 0.319 0.750
##
## (Intercept) ***
## as.factor(area)US ***
## yearsafter2004 ***
## annual_domestic_RD_spending_bil_dollars
## gdp_per_capita_annual_growthrate_usd
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 267.81 on 35 degrees of freedom
## Residual deviance: 47.34 on 31 degrees of freedom
## AIC: 221.12
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.2, "2", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.new.od.2.vcov <- vcovPL(mod.pois.new.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.2 <- coeftest(mod.pois.new.od.2, vcov = mod.pois.new.od.2.vcov)
mod.pois.new.od.nw.2
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.6355035 0.1076405 15.1941 < 2.2e-16
## as.factor(area)US 0.8468101 0.1606519 5.2711 1.356e-07
## yearsafter2004 0.0706765 0.0162214 4.3570 1.319e-05
## annual_domestic_RD_spending_bil_dollars 0.0021014 0.0050413 0.4168 0.6768
## gdp_per_capita_annual_growthrate_usd 0.0025715 0.0115537 0.2226 0.8239
##
## (Intercept) ***
## as.factor(area)US ***
## yearsafter2004 ***
## annual_domestic_RD_spending_bil_dollars
## gdp_per_capita_annual_growthrate_usd
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.2.metrics <- get_new_od_model_metrics(mod.pois.new.od.2, "Poisson")
confint(mod.pois.new.od.nw.2)
## 2.5 % 97.5 %
## (Intercept) 1.424532098 1.84647494
## as.factor(area)US 0.531938159 1.16168213
## yearsafter2004 0.038883219 0.10246977
## annual_domestic_RD_spending_bil_dollars -0.007779401 0.01198222
## gdp_per_capita_annual_growthrate_usd -0.020073322 0.02521630
Poisson model 3 of Annual Num of MAs for NEW orphan drugs
mod.pois.new.od.3 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.3)
##
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## gdp_per_capita_annual_growthrate_usd + as.factor(area):yearsafter2004,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.804979 0.164882 10.947 < 2e-16 ***
## as.factor(area)US 0.663508 0.195491 3.394 0.000689 ***
## yearsafter2004 0.059871 0.014328 4.179 2.93e-05 ***
## gdp_per_capita_annual_growthrate_usd 0.001917 0.007922 0.242 0.808790
## as.factor(area)US:yearsafter2004 0.022662 0.017193 1.318 0.187464
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 267.814 on 35 degrees of freedom
## Residual deviance: 45.714 on 31 degrees of freedom
## AIC: 219.49
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.3, "3", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.new.od.3.vcov <- vcovPL(mod.pois.new.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.3 <- coeftest(mod.pois.new.od.3, vcov = mod.pois.new.od.3.vcov)
mod.pois.new.od.nw.3
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.804979 0.182915 9.8678 < 2.2e-16 ***
## as.factor(area)US 0.663508 0.178617 3.7147 0.0002035 ***
## yearsafter2004 0.059871 0.015031 3.9831 6.801e-05 ***
## gdp_per_capita_annual_growthrate_usd 0.001917 0.010548 0.1817 0.8557823
## as.factor(area)US:yearsafter2004 0.022662 0.013997 1.6191 0.1054195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.3.metrics <- get_new_od_model_metrics(mod.pois.new.od.3, "Poisson")
confint(mod.pois.new.od.nw.3)
## 2.5 % 97.5 %
## (Intercept) 1.446471475 2.16348686
## as.factor(area)US 0.313425021 1.01359188
## yearsafter2004 0.030410385 0.08933097
## gdp_per_capita_annual_growthrate_usd -0.018756022 0.02259000
## as.factor(area)US:yearsafter2004 -0.004770509 0.05009488
Poisson model 4 of Annual Num of MAs for NEW orphan drugs
mod.pois.new.od.4 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.4)
##
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + annual_domestic_RD_spending_bil_dollars +
## gdp_per_capita_annual_growthrate_usd + as.factor(area):annual_domestic_RD_spending_bil_dollars,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.180839 0.312220
## as.factor(area)US 0.905994 0.347318
## annual_domestic_RD_spending_bil_dollars 0.045914 0.011172
## gdp_per_capita_annual_growthrate_usd -0.001498 0.007761
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.021750 0.011578
## z value Pr(>|z|)
## (Intercept) 3.782 0.000156 ***
## as.factor(area)US 2.609 0.009093 **
## annual_domestic_RD_spending_bil_dollars 4.110 3.96e-05 ***
## gdp_per_capita_annual_growthrate_usd -0.193 0.846973
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -1.879 0.060301 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 267.814 on 35 degrees of freedom
## Residual deviance: 60.354 on 31 degrees of freedom
## AIC: 234.13
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.4, "4", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.new.od.4.vcov <- vcovPL(mod.pois.new.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.4 <- coeftest(mod.pois.new.od.4, vcov = mod.pois.new.od.4.vcov)
mod.pois.new.od.nw.4
##
## z test of coefficients:
##
## Estimate Std. Error
## (Intercept) 1.1808391 0.3401473
## as.factor(area)US 0.9059936 0.3466624
## annual_domestic_RD_spending_bil_dollars 0.0459142 0.0115817
## gdp_per_capita_annual_growthrate_usd -0.0014978 0.0104463
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.0217502 0.0105705
## z value Pr(>|z|)
## (Intercept) 3.4716 0.0005175 ***
## as.factor(area)US 2.6135 0.0089627 **
## annual_domestic_RD_spending_bil_dollars 3.9644 7.359e-05 ***
## gdp_per_capita_annual_growthrate_usd -0.1434 0.8859913
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -2.0576 0.0396262 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.4.metrics <- get_new_od_model_metrics(mod.pois.new.od.4, "Poisson")
confint(mod.pois.new.od.nw.4)
## 2.5 %
## (Intercept) 0.51416259
## as.factor(area)US 0.22654767
## annual_domestic_RD_spending_bil_dollars 0.02321443
## gdp_per_capita_annual_growthrate_usd -0.02197220
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.04246807
## 97.5 %
## (Intercept) 1.847515641
## as.factor(area)US 1.585439503
## annual_domestic_RD_spending_bil_dollars 0.068613875
## gdp_per_capita_annual_growthrate_usd 0.018976647
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.001032325
Poisson model 5 of Annual Num of MAs for NEW orphan drugs
mod.pois.new.od.5 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd
+ as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.5)
##
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## annual_domestic_RD_spending_bil_dollars + gdp_per_capita_annual_growthrate_usd +
## as.factor(area):yearsafter2004 + as.factor(area):annual_domestic_RD_spending_bil_dollars,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.594788 0.649095
## as.factor(area)US 0.991965 0.680585
## yearsafter2004 0.042603 0.054937
## annual_domestic_RD_spending_bil_dollars 0.014167 0.043172
## gdp_per_capita_annual_growthrate_usd 0.003007 0.008077
## as.factor(area)US:yearsafter2004 0.057304 0.060991
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.020057 0.044019
## z value Pr(>|z|)
## (Intercept) 2.457 0.014 *
## as.factor(area)US 1.458 0.145
## yearsafter2004 0.775 0.438
## annual_domestic_RD_spending_bil_dollars 0.328 0.743
## gdp_per_capita_annual_growthrate_usd 0.372 0.710
## as.factor(area)US:yearsafter2004 0.940 0.347
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.456 0.649
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 267.814 on 35 degrees of freedom
## Residual deviance: 45.111 on 29 degrees of freedom
## AIC: 222.89
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.5, "5", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.new.od.5.vcov <- vcovPL(mod.pois.new.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.5 <- coeftest(mod.pois.new.od.5, vcov = mod.pois.new.od.5.vcov)
mod.pois.new.od.nw.5
##
## z test of coefficients:
##
## Estimate Std. Error
## (Intercept) 1.5947882 0.7374296
## as.factor(area)US 0.9919651 0.6904769
## yearsafter2004 0.0426032 0.0598991
## annual_domestic_RD_spending_bil_dollars 0.0141670 0.0472703
## gdp_per_capita_annual_growthrate_usd 0.0030073 0.0108619
## as.factor(area)US:yearsafter2004 0.0573035 0.0657810
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.0200566 0.0476424
## z value Pr(>|z|)
## (Intercept) 2.1626 0.03057 *
## as.factor(area)US 1.4366 0.15082
## yearsafter2004 0.7112 0.47693
## annual_domestic_RD_spending_bil_dollars 0.2997 0.76440
## gdp_per_capita_annual_growthrate_usd 0.2769 0.78188
## as.factor(area)US:yearsafter2004 0.8711 0.38369
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.4210 0.67377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.5.metrics <- get_new_od_model_metrics(mod.pois.new.od.5, "Poisson")
confint(mod.pois.new.od.nw.5)
## 2.5 %
## (Intercept) 0.14945274
## as.factor(area)US -0.36134473
## yearsafter2004 -0.07479700
## annual_domestic_RD_spending_bil_dollars -0.07848104
## gdp_per_capita_annual_growthrate_usd -0.01828168
## as.factor(area)US:yearsafter2004 -0.07162484
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars -0.11343401
## 97.5 %
## (Intercept) 3.04012362
## as.factor(area)US 2.34527483
## yearsafter2004 0.16000333
## annual_domestic_RD_spending_bil_dollars 0.10681501
## gdp_per_capita_annual_growthrate_usd 0.02429626
## as.factor(area)US:yearsafter2004 0.18623189
## as.factor(area)US:annual_domestic_RD_spending_bil_dollars 0.07332071
mod.pois.new.od.metrics <- rbind(mod.pois.new.od.1.metrics, mod.pois.new.od.2.metrics, mod.pois.new.od.3.metrics, mod.pois.new.od.4.metrics, mod.pois.new.od.5.metrics)
aic_values <- round(mod.pois.new.od.metrics$aic)
rmse_values <- round(mod.pois.new.od.metrics$rmse, 2)
loglik_values <- round(mod.pois.new.od.metrics$LogLik, 2)
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.pois.new.od.1
m2 <- mod.pois.new.od.2
m3 <- mod.pois.new.od.3
m4 <- mod.pois.new.od.4
m5 <- mod.pois.new.od.5
stargazer(m1, m2, m3, m4, m5, title="Poisson Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/PoissonNEWorphandrugMAmodelsNonAdjusted.htm",
covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)" , "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)" , "Intercept (β<sup>̂</sup><sub>0</sub>)"),
dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", loglik_values)))
##
## <table style="text-align:center"><caption><strong>Poisson Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>0.899<sup>***</sup></td><td>0.847<sup>***</sup></td><td>0.664<sup>***</sup></td><td>0.906<sup>***</sup></td><td>0.992</td></tr>
## <tr><td style="text-align:left"></td><td>(0.086)</td><td>(0.178)</td><td>(0.195)</td><td>(0.347)</td><td>(0.681)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>0.076<sup>***</sup></td><td>0.071<sup>***</sup></td><td>0.060<sup>***</sup></td><td></td><td>0.043</td></tr>
## <tr><td style="text-align:left"></td><td>(0.008)</td><td>(0.018)</td><td>(0.014)</td><td></td><td>(0.055)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.002</td><td></td><td>0.046<sup>***</sup></td><td>0.014</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.006)</td><td></td><td>(0.011)</td><td>(0.043)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.003</td><td>0.002</td><td>-0.001</td><td>0.003</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.008)</td><td>(0.008)</td><td>(0.008)</td><td>(0.008)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>0.023</td><td></td><td>0.057</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.017)</td><td></td><td>(0.061)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>-0.022<sup>*</sup></td><td>-0.020</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.012)</td><td>(0.044)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>1.646<sup>***</sup></td><td>1.636<sup>***</sup></td><td>1.805<sup>***</sup></td><td>1.181<sup>***</sup></td><td>1.595<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.109)</td><td>(0.111)</td><td>(0.165)</td><td>(0.312)</td><td>(0.649)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>217</td><td>221</td><td>219</td><td>234</td><td>223</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>4.77</td><td>4.82</td><td>4.72</td><td>5.68</td><td>4.67</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-105.69</td><td>-105.56</td><td>-104.75</td><td>-112.07</td><td>-104.44</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.pois.new.od.nw.1
m2 <- mod.pois.new.od.nw.2
m3 <- mod.pois.new.od.nw.3
m4 <- mod.pois.new.od.nw.4
m5 <- mod.pois.new.od.nw.5
stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Poisson Models of Market Authorizations for New Orphan Drugs ", align=TRUE, type = "html", out="Final Models Formatted/PoissonNEWorphandrugMAmodels.htm",
covariate.labels = c("US (β<sup>̂</sup><sub>1</sub>)" , "Years After 2004 (β<sup>̂</sup><sub>2</sub>)", "Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)", "Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)", "Years After 2004:US (β<sup>̂</sup><sub>5</sub>)", "Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)" , "Intercept (β<sup>̂</sup><sub>0</sub>)"),
dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", loglik_values)))
##
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Poisson Models of Market Authorizations for New Orphan Drugs</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US (β<sup>̂</sup><sub>1</sub>)</td><td>0.899<sup>***</sup></td><td>0.847<sup>***</sup></td><td>0.664<sup>***</sup></td><td>0.906<sup>***</sup></td><td>0.992</td></tr>
## <tr><td style="text-align:left"></td><td>(0.085)</td><td>(0.161)</td><td>(0.179)</td><td>(0.347)</td><td>(0.690)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004 (β<sup>̂</sup><sub>2</sub>)</td><td>0.076<sup>***</sup></td><td>0.071<sup>***</sup></td><td>0.060<sup>***</sup></td><td></td><td>0.043</td></tr>
## <tr><td style="text-align:left"></td><td>(0.007)</td><td>(0.016)</td><td>(0.015)</td><td></td><td>(0.060)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD) (β<sup>̂</sup><sub>3</sub>)</td><td></td><td>0.002</td><td></td><td>0.046<sup>***</sup></td><td>0.014</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.005)</td><td></td><td>(0.012)</td><td>(0.047)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita (β<sup>̂</sup><sub>4</sub>)</td><td></td><td>0.003</td><td>0.002</td><td>-0.001</td><td>0.003</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.012)</td><td>(0.011)</td><td>(0.010)</td><td>(0.011)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US (β<sup>̂</sup><sub>5</sub>)</td><td></td><td></td><td>0.023</td><td></td><td>0.057</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.014)</td><td></td><td>(0.066)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual Domestic R&D Spending (Billions USD):US (β<sup>̂</sup><sub>6</sub>)</td><td></td><td></td><td></td><td>-0.022<sup>**</sup></td><td>-0.020</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.011)</td><td>(0.048)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept (β<sup>̂</sup><sub>0</sub>)</td><td>1.646<sup>***</sup></td><td>1.636<sup>***</sup></td><td>1.805<sup>***</sup></td><td>1.181<sup>***</sup></td><td>1.595<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.122)</td><td>(0.108)</td><td>(0.183)</td><td>(0.340)</td><td>(0.737)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>217</td><td>221</td><td>219</td><td>234</td><td>223</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>4.77</td><td>4.82</td><td>4.72</td><td>5.68</td><td>4.67</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-105.69</td><td>-105.56</td><td>-104.75</td><td>-112.07</td><td>-104.44</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>